# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [1]:
import sys; sys.path.insert(0, '..')
import itertools

In [2]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [3]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [4]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [5]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [6]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [7]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [8]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [9]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [10]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [11]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [12]:
main_node = "whip"
delete_main_node(main_node)
add_main_node(main_node)

  


### Set up the query

In [13]:
query = '[lemma="whip"]'

result = topic_concordancer.main(query,window=10,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [14]:
lemmas = ["whip","carry"]

In [15]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="whip"][]{0,10}[lemma="carry"])|([lemma="carry"][]{0,10}[lemma="whip"])


In [16]:
domain_term = "carry"

In [17]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [18]:
fragment_1 = {}
fragment_1['original_sentence'] = "So we had to go down, 400, 500 people, and carry that cable on their-- and they was going with whips, leather whips, eh!"
fragment_1['label']="So we had to go down, 400, 500 people, and carry that cable on their-- and they was going with whips, leather whips, eh! "
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22we%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22go%22%5D+%5B%22down%22%5D+%5B%5D+%5B%22400%22%5D+%5B%5D+%5B%22500%22%5D+%5B%22people%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22carry%22%5D+%5B%22that%22%5D+%5B%22cable%22%5D+%5B%22on%22%5D+%5B%22their%22%5D+%5B%5D%7B0%2C3%7D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22was%22%5D+%5B%22going%22%5D+%5B%22with%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22leather%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22eh%22%5D+%5B%22%21%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So we had to go down , 400 , 500 people , and carry that cable on their -- and they was going with whips , leather whips , eh ! ', 'right': '', 'complete_match': 'So we had to go down , 400 , 500 people , and carry that cable on their -- and they was going with whips , leather whips , eh ! ', 'testimony_id': 'usc_shoah_6009', 'shelfmark': ['USC Shoah Foundation 6009'], 'tok

In [19]:
fragment_2 = {}
fragment_2['original_sentence'] = "We had to carry small bombs. And the SS were standing with the whips, and if you didn't pick it up and take it on the train, you were whipped."
fragment_2['label']="We had to carry small bombs. And the SS were standing with the whips, and if you didn't pick it up (..), you were whipped."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22carry%22%5D+%5B%22small%22%5D+%5B%22bombs%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22the%22%5D+%5B%22SS%22%5D+%5B%22were%22%5D+%5B%22standing%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22if%22%5D+%5B%22you%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22pick%22%5D+%5B%22it%22%5D+%5B%22up%22%5D+%5B%22and%22%5D+%5B%22take%22%5D+%5B%22it%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22train%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22were%22%5D+%5B%22whipped%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "We had to carry small bombs . And the SS were standing with the whips , and if you did n't pick it up and take it on the train , you were whipped . ", 'right': '', 'complete_match': "We had to carry small bombs . And the SS were standing with the whips , and if you did n't pick it up and take it on the train , you

In [20]:
fragment_3 = {}
fragment_3['original_sentence'] = "And whatever they had left-- some people took their suitcases. They were kicked and whipped."
fragment_3['label']="And whatever they had left-- some people took their suitcases. They were kicked and whipped."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22whatever%22%5D+%5B%22they%22%5D+%5B%22had%22%5D+%5B%22left%22%5D+%5B%5D%7B0%2C3%7D+%5B%22some%22%5D+%5B%22people%22%5D+%5B%22took%22%5D+%5B%22their%22%5D+%5B%22suitcases%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22were%22%5D+%5B%22kicked%22%5D+%5B%22and%22%5D+%5B%22whipped%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And whatever they had left -- some people took their suitcases . They were kicked and whipped . ', 'right': '', 'complete_match': 'And whatever they had left -- some people took their suitcases . They were kicked and whipped . ', 'testimony_id': 'usc_shoah_9262', 'shelfmark': ['USC Shoah Foundation 9262'], 'token_start': 23194, 'token_end': 23212}


In [21]:
add_testimonial_fragments(fragments)

### 2.  

In [22]:
lemmas = ["kill","whip"]

In [23]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="kill"][]{0,10}[lemma="whip"])|([lemma="whip"][]{0,10}[lemma="kill"])


In [24]:
domain_term = "kill"

In [25]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [26]:
fragment_1 = {}
fragment_1['original_sentence'] = "Except that girl that had a – one leg—she, ah, they – for some reason – they beat her with twenty-five whips"
fragment_1['label']="Except that girl that had a – one leg—she,(..) they beat her with twenty-five whips(..)"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Except%22%5D+%5B%22that%22%5D+%5B%22girl%22%5D+%5B%22that%22%5D+%5B%22had%22%5D+%5B%22a%22%5D+%5B%5D%7B0%2C3%7D+%5B%22one%22%5D+%5B%22leg%22%5D+%5B%5D%7B0%2C3%7D+%5B%22she%22%5D+%5B%5D+%5B%22ah%22%5D+%5B%5D+%5B%22they%22%5D+%5B%5D%7B0%2C3%7D+%5B%22for%22%5D+%5B%22some%22%5D+%5B%22reason%22%5D+%5B%5D%7B0%2C3%7D+%5B%22they%22%5D+%5B%22beat%22%5D+%5B%22her%22%5D+%5B%22with%22%5D+%5B%5D%7B0%2C3%7D+%5B%22whips%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Except that girl that had a – one leg — she , ah , they – for some reason – they beat her with twenty-five whips ', 'right': '', 'complete_match': 'Except that girl that had a – one leg — she , ah , they – for some reason – they beat her with twenty-five whips ', 'testimony_id': 'irn510737', 'shelfmark': ['USHMM RG-50.154*0026'], 'token_start': 10109, 'token_end': 10135}


In [27]:
fragment_2 = {}
fragment_2['original_sentence'] = "one of the SSs called, uh, told once that he could kill, he had a very big whip made, that he could kill a Jew in 10 whips"
fragment_2['label']="(..) one of the SSs called, uh, told once that he could kill, he had a very big whip made, that he could kill a Jew in 10 whips (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22one%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22SSs%22%5D+%5B%22called%22%5D+%5B%5D+%5B%22uh%22%5D+%5B%5D+%5B%22told%22%5D+%5B%22once%22%5D+%5B%22that%22%5D+%5B%22he%22%5D+%5B%22could%22%5D+%5B%22kill%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22had%22%5D+%5B%22a%22%5D+%5B%22very%22%5D+%5B%22big%22%5D+%5B%22whip%22%5D+%5B%22made%22%5D+%5B%5D+%5B%22that%22%5D+%5B%22he%22%5D+%5B%22could%22%5D+%5B%22kill%22%5D+%5B%22a%22%5D+%5B%22Jew%22%5D+%5B%22in%22%5D+%5B%2210%22%5D+%5B%22whips%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'one of the SSs called , uh , told once that he could kill , he had a very big whip made , that he could kill a Jew in 10 whips ', 'right': '', 'complete_match': 'one of the SSs called , uh , told once that he could kill , he had a very big whip made , that he could kill a Jew in 10 whips ', 'testimony_id': 'irn505564', 'shelfmark': ['USHMM RG-50.042*0010'], 'to

In [28]:
fragment_3 = {}
fragment_3['original_sentence'] = "he humiliated people by lashing-- beating a person with a whip-- in front of all the workers. And on one occasion, as I mentioned before, he had killed a person not too far from myself."
fragment_3['label']="he humiliated people by lashing-- beating a person with a whip-- (..) And on one occasion (..) he had killed a person not too far from myself."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22he%22%5D+%5B%22humiliated%22%5D+%5B%22people%22%5D+%5B%22by%22%5D+%5B%22lashing%22%5D+%5B%5D%7B0%2C3%7D+%5B%22beating%22%5D+%5B%22a%22%5D+%5B%22person%22%5D+%5B%22with%22%5D+%5B%22a%22%5D+%5B%22whip%22%5D+%5B%5D%7B0%2C3%7D+%5B%22in%22%5D+%5B%22front%22%5D+%5B%22of%22%5D+%5B%22all%22%5D+%5B%22the%22%5D+%5B%22workers%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22on%22%5D+%5B%22one%22%5D+%5B%22occasion%22%5D+%5B%5D+%5B%22as%22%5D+%5B%22I%22%5D+%5B%22mentioned%22%5D+%5B%22before%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22had%22%5D+%5B%22killed%22%5D+%5B%22a%22%5D+%5B%22person%22%5D+%5B%22not%22%5D+%5B%22too%22%5D+%5B%22far%22%5D+%5B%22from%22%5D+%5B%22myself%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'he humiliated people by lashing -- beating a person with a whip -- in front of all the workers . And on one occasion , as I mentioned before , he had killed a person not too far from m

In [29]:
fragment_4 = {}
fragment_4['original_sentence'] = "so he says to him that he just killed a Jew with 12 hits with his whip."
fragment_4['label']= "(..) so he says to him that he just killed a Jew with 12 hits with his whip."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22so%22%5D+%5B%22he%22%5D+%5B%22says%22%5D+%5B%22to%22%5D+%5B%22him%22%5D+%5B%22that%22%5D+%5B%22he%22%5D+%5B%22just%22%5D+%5B%22killed%22%5D+%5B%22a%22%5D+%5B%22Jew%22%5D+%5B%22with%22%5D+%5B%2212%22%5D+%5B%22hits%22%5D+%5B%22with%22%5D+%5B%22his%22%5D+%5B%22whip%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'so he says to him that he just killed a Jew with 12 hits with his whip . ', 'right': '', 'complete_match': 'so he says to him that he just killed a Jew with 12 hits with his whip . ', 'testimony_id': 'irn504562', 'shelfmark': ['USHMM RG-50.030*0066'], 'token_start': 6707, 'token_end': 6725}


In [30]:
add_testimonial_fragments(fragments)

### 3.  

In [31]:
lemmas = ["march","woman"]

In [32]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="march"][]{0,10}[lemma="woman"])|([lemma="woman"][]{0,10}[lemma="march"])


In [33]:
domain_term = "march"

In [34]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [35]:
fragment_1 = {}
fragment_1['original_sentence'] = "And the SS lifted their whips. It was forward march. And we started to march. [PAUSES FOR 5 SECONDS] We left a bloody trail in, in the snow."
fragment_1['label']="And the SS lifted their whips. It was forward march. And we started to march. We left a bloody trail in, in the snow."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22the%22%5D+%5B%22SS%22%5D+%5B%22lifted%22%5D+%5B%22their%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22It%22%5D+%5B%22was%22%5D+%5B%22forward%22%5D+%5B%22march%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22we%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22march%22%5D+%5B%5D+%5B%5D%7B0%2C3%7D+%5B%22PAUSES%22%5D+%5B%22FOR%22%5D+%5B%225%22%5D+%5B%22SECONDS%22%5D+%5B%5D%7B0%2C3%7D+%5B%22We%22%5D+%5B%22left%22%5D+%5B%22a%22%5D+%5B%22bloody%22%5D+%5B%22trail%22%5D+%5B%22in%22%5D+%5B%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22snow%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And the SS lifted their whips . It was forward march . And we started to march . [ PAUSES FOR 5 SECONDS ] We left a bloody trail in , in the snow . ', 'right': '', 'complete_match': 'And the SS lifted their whips . It was forward march . And we started to march . [ PAUSES FOR 5 SECONDS ] We left a bloody

In [36]:
fragment_2 = {}
fragment_2['original_sentence'] = "We were marched through the town under the whips of the SS uh to a certain collection"
fragment_2['label']="We were marched through the town under the whips of the SS uh to a certain collection (..)."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22were%22%5D+%5B%22marched%22%5D+%5B%22through%22%5D+%5B%22the%22%5D+%5B%22town%22%5D+%5B%22under%22%5D+%5B%22the%22%5D+%5B%22whips%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22SS%22%5D+%5B%22uh%22%5D+%5B%22to%22%5D+%5B%22a%22%5D+%5B%22certain%22%5D+%5B%22collection%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We were marched through the town under the whips of the SS uh to a certain collection ', 'right': '', 'complete_match': 'We were marched through the town under the whips of the SS uh to a certain collection ', 'testimony_id': 'irn504599', 'shelfmark': ['USHMM RG-50.030*0105'], 'token_start': 6838, 'token_end': 6855}


In [37]:
fragment_3 = {}
fragment_3['original_sentence'] = 'And on the side were the SS men and the SS women, and they lifted their whips and they said'
fragment_3['label']='And on the side were the SS men and the SS women, and they lifted their whips and they said, "Forward march."'
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22side%22%5D+%5B%22were%22%5D+%5B%22the%22%5D+%5B%22SS%22%5D+%5B%22men%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22SS%22%5D+%5B%22women%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22lifted%22%5D+%5B%22their%22%5D+%5B%22whips%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22said%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And on the side were the SS men and the SS women , and they lifted their whips and they said ', 'right': '', 'complete_match': 'And on the side were the SS men and the SS women , and they lifted their whips and they said ', 'testimony_id': 'irn507376', 'shelfmark': ['USHMM RG-50.042*0001'], 'token_start': 13993, 'token_end': 14014}


In [38]:
fragment_4 = {}
fragment_4['original_sentence'] = "We were marched every morning into fields. And there was always a whip above your head."
fragment_4['label']= "We were marched every morning into fields. And there was always a whip above your head."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22were%22%5D+%5B%22marched%22%5D+%5B%22every%22%5D+%5B%22morning%22%5D+%5B%22into%22%5D+%5B%22fields%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22there%22%5D+%5B%22was%22%5D+%5B%22always%22%5D+%5B%22a%22%5D+%5B%22whip%22%5D+%5B%22above%22%5D+%5B%22your%22%5D+%5B%22head%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We were marched every morning into fields . And there was always a whip above your head . ', 'right': '', 'complete_match': 'We were marched every morning into fields . And there was always a whip above your head . ', 'testimony_id': 'usc_shoah_27443', 'shelfmark': ['USC Shoah Foundation 27443'], 'token_start': 9844, 'token_end': 9862}


In [39]:
fragment_5 = {}
fragment_5['original_sentence'] = " And some of the whips hit us. And so we were marching quickly."
fragment_5['label']= " And some of the whips hit us. And so we were marching quickly. "
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22some%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22whips%22%5D+%5B%22hit%22%5D+%5B%22us%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22so%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22marching%22%5D+%5B%22quickly%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And some of the whips hit us . And so we were marching quickly . ', 'right': '', 'complete_match': 'And some of the whips hit us . And so we were marching quickly . ', 'testimony_id': 'usc_shoah_2916', 'shelfmark': ['USC Shoah Foundation 2916'], 'token_start': 11302, 'token_end': 11317}


In [40]:
add_testimonial_fragments(fragments)

### 4.  

In [41]:
lemmas = ["whip","run"]

In [42]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="whip"][]{0,10}[lemma="run"])|([lemma="run"][]{0,10}[lemma="whip"])


In [43]:
domain_term = "run"

In [44]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [45]:
fragment_1 = {}
fragment_1['original_sentence'] = 'They had whips and were yelling'
fragment_1['label']='They had whips and were yelling,"Run, run, fast, fast."'
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22had%22%5D+%5B%22whips%22%5D+%5B%22and%22%5D+%5B%22were%22%5D+%5B%22yelling%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They had whips and were yelling ', 'right': '', 'complete_match': 'They had whips and were yelling ', 'testimony_id': 'irn503624', 'shelfmark': ['USHMM RG-50.005*0028'], 'token_start': 6481, 'token_end': 6487}


In [46]:
fragment_2 = {}
fragment_2['original_sentence'] = "Just whip and run. Whip you and run. Always run, run, run, run."
fragment_2['label']="Just whip and run. Whip you and run. Always run, run, run, run."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Just%22%5D+%5B%22whip%22%5D+%5B%22and%22%5D+%5B%22run%22%5D+%5B%5D+%5B%22Whip%22%5D+%5B%22you%22%5D+%5B%22and%22%5D+%5B%22run%22%5D+%5B%5D+%5B%22Always%22%5D+%5B%22run%22%5D+%5B%5D+%5B%22run%22%5D+%5B%5D+%5B%22run%22%5D+%5B%5D+%5B%22run%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Just whip and run . Whip you and run . Always run , run , run , run . ', 'right': '', 'complete_match': 'Just whip and run . Whip you and run . Always run , run , run , run . ', 'testimony_id': 'HVT-158', 'shelfmark': ['Fortunoff Archive HVT-158'], 'token_start': 9438, 'token_end': 9457}


In [47]:
fragment_3 = {}
fragment_3['original_sentence'] = "So they whipped preferably those with loads, and so they had to run."
fragment_3['label']="So they whipped preferably those with loads, and so they had to run. "
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22they%22%5D+%5B%22whipped%22%5D+%5B%22preferably%22%5D+%5B%22those%22%5D+%5B%22with%22%5D+%5B%22loads%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22so%22%5D+%5B%22they%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22run%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So they whipped preferably those with loads , and so they had to run . ', 'right': '', 'complete_match': 'So they whipped preferably those with loads , and so they had to run . ', 'testimony_id': 'usc_shoah_15694', 'shelfmark': ['USC Shoah Foundation 15694'], 'token_start': 25120, 'token_end': 25135}


In [48]:
fragment_4 = {}
fragment_4['original_sentence'] = "once we were running, he made us run in a circle on the ice and whipped us"
fragment_4['label']= "(..)once we were running, he made us run in a circle on the ice and whipped us (..)."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22once%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22running%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22made%22%5D+%5B%22us%22%5D+%5B%22run%22%5D+%5B%22in%22%5D+%5B%22a%22%5D+%5B%22circle%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22ice%22%5D+%5B%22and%22%5D+%5B%22whipped%22%5D+%5B%22us%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'once we were running , he made us run in a circle on the ice and whipped us ', 'right': '', 'complete_match': 'once we were running , he made us run in a circle on the ice and whipped us ', 'testimony_id': 'usc_shoah_21582', 'shelfmark': ['USC Shoah Foundation 21582'], 'token_start': 26674, 'token_end': 26692}


In [49]:
fragment_5 = {}
fragment_5['original_sentence'] = "while we were running out of the houses, they were beating with the whips"
fragment_5['label']= "(..)while we were running out of the houses, they were beating with the whips (..)"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22while%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22running%22%5D+%5B%22out%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22houses%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22beating%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22whips%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'while we were running out of the houses , they were beating with the whips ', 'right': '', 'complete_match': 'while we were running out of the houses , they were beating with the whips ', 'testimony_id': 'HVT-91', 'shelfmark': ['Fortunoff Archive HVT-91'], 'token_start': 730, 'token_end': 745}


In [50]:
add_testimonial_fragments(fragments)

### 5.  

In [51]:
lemmas = ["walk","whip"]

In [52]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="walk"][]{0,10}[lemma="whip"])|([lemma="whip"][]{0,10}[lemma="walk"])


In [53]:
domain_term = "walk"

In [54]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [55]:
fragment_1 = {}
fragment_1['original_sentence'] = "We were horse whipped all the way to the camp, walking 3 or 4, 2 or 3 miles to the camp"
fragment_1['label']="We were horse whipped all the way to the camp, walking 3 or 4, 2 or 3 miles to the camp (..)."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22were%22%5D+%5B%22horse%22%5D+%5B%22whipped%22%5D+%5B%22all%22%5D+%5B%22the%22%5D+%5B%22way%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22camp%22%5D+%5B%5D+%5B%22walking%22%5D+%5B%223%22%5D+%5B%22or%22%5D+%5B%224%22%5D+%5B%5D+%5B%222%22%5D+%5B%22or%22%5D+%5B%223%22%5D+%5B%22miles%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22camp%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We were horse whipped all the way to the camp , walking 3 or 4 , 2 or 3 miles to the camp ', 'right': '', 'complete_match': 'We were horse whipped all the way to the camp , walking 3 or 4 , 2 or 3 miles to the camp ', 'testimony_id': 'irn505558', 'shelfmark': ['USHMM RG-50.042*0004'], 'token_start': 10226, 'token_end': 10249}


In [56]:
fragment_2 = {}
fragment_2['original_sentence'] = "We were walking in and they were stand with the long whips"
fragment_2['label']="We were walking in and they were stand with the long whips and they’re beating on us (..)."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%22in%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22stand%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22long%22%5D+%5B%22whips%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We were walking in and they were stand with the long whips ', 'right': '', 'complete_match': 'We were walking in and they were stand with the long whips ', 'testimony_id': 'irn506639', 'shelfmark': ['USHMM RG-50.106*0128'], 'token_start': 8626, 'token_end': 8638}


In [57]:
fragment_3 = {}
fragment_3['original_sentence'] = "SS men hitting them with whips and urging them to walk faster."
fragment_3['label']="(..) SS men hitting them with whips and urging them to walk faster."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22SS%22%5D+%5B%22men%22%5D+%5B%22hitting%22%5D+%5B%22them%22%5D+%5B%22with%22%5D+%5B%22whips%22%5D+%5B%22and%22%5D+%5B%22urging%22%5D+%5B%22them%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22faster%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'SS men hitting them with whips and urging them to walk faster . ', 'right': '', 'complete_match': 'SS men hitting them with whips and urging them to walk faster . ', 'testimony_id': 'usc_shoah_9701', 'shelfmark': ['USC Shoah Foundation 9701'], 'token_start': 5072, 'token_end': 5085}


In [58]:
fragment_4 = {}
fragment_4['original_sentence'] = "on each between the walking back and forth work and whip and whip and whip --- it’s just like, like a --- something on the air, sometime get you."
fragment_4['label']= "(..) on each between the walking back and forth work and whip and whip and whip --- it’s just like, like a --- something on the air, sometime get you."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22on%22%5D+%5B%22each%22%5D+%5B%22between%22%5D+%5B%22the%22%5D+%5B%22walking%22%5D+%5B%22back%22%5D+%5B%22and%22%5D+%5B%22forth%22%5D+%5B%22work%22%5D+%5B%22and%22%5D+%5B%22whip%22%5D+%5B%22and%22%5D+%5B%22whip%22%5D+%5B%22and%22%5D+%5B%22whip%22%5D+%5B%5D%7B0%2C3%7D+%5B%22it%22%5D+%5B%5D+%5B%22just%22%5D+%5B%22like%22%5D+%5B%5D+%5B%22like%22%5D+%5B%22a%22%5D+%5B%5D%7B0%2C3%7D+%5B%22something%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22air%22%5D+%5B%5D+%5B%22sometime%22%5D+%5B%22get%22%5D+%5B%22you%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'on each between the walking back and forth work and whip and whip and whip --- it ’s just like , like a --- something on the air , sometime get you . ', 'right': '', 'complete_match': 'on each between the walking back and forth work and whip and whip and whip --- it ’s just like , like a --- something on the air , sometime 

In [59]:
fragment_5 = {}
fragment_5['original_sentence'] = "Sundays, where we didn't work, and we walked around outside, the went around with whips, and they whip up, just walking there."
fragment_5['label']= "Sundays, where we didn't work, and we walked around outside, the went around with whips, and they whip up, just walking there."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Sundays%22%5D+%5B%5D+%5B%22where%22%5D+%5B%22we%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22work%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22walked%22%5D+%5B%22around%22%5D+%5B%22outside%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22went%22%5D+%5B%22around%22%5D+%5B%22with%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22whip%22%5D+%5B%22up%22%5D+%5B%5D+%5B%22just%22%5D+%5B%22walking%22%5D+%5B%22there%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "Sundays , where we did n't work , and we walked around outside , the went around with whips , and they whip up , just walking there . ", 'right': '', 'complete_match': "Sundays , where we did n't work , and we walked around outside , the went around with whips , and they whip up , just walking there . ", 'testimony_id': 'usc_shoah_2684', 'shelfmark': ['USC Shoah Foundation 2684'], 'token_start': 15592, 'token_en

In [60]:
add_testimonial_fragments(fragments)

### 6.  

In [61]:
lemmas = ["count","whip"]

In [62]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="count"][]{0,10}[lemma="whip"])|([lemma="whip"][]{0,10}[lemma="count"])


In [63]:
domain_term = "count"

In [64]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [65]:
fragment_1 = {}
fragment_1['original_sentence'] = "But I got twenty- five and you had to count. You didn’t count you got the whip."
fragment_1['label']="But I got twenty- five and you had to count. You didn’t count you got the whip."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22But%22%5D+%5B%22I%22%5D+%5B%22got%22%5D+%5B%22twenty%22%5D+%5B%5D%7B0%2C3%7D+%5B%22five%22%5D+%5B%22and%22%5D+%5B%22you%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22count%22%5D+%5B%5D+%5B%22You%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22count%22%5D+%5B%22you%22%5D+%5B%22got%22%5D+%5B%22the%22%5D+%5B%22whip%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'But I got twenty - five and you had to count . You did n’t count you got the whip . ', 'right': '', 'complete_match': 'But I got twenty - five and you had to count . You did n’t count you got the whip . ', 'testimony_id': 'irn509193', 'shelfmark': ['USHMM RG-50.233*0111'], 'token_start': 15136, 'token_end': 15157}


In [66]:
fragment_2 = {}
fragment_2['original_sentence'] = "they told him that he would get now a hundred whips and he has to count."
fragment_2['label']="(..) they told him that he would get now a hundred whips and he has to count."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22they%22%5D+%5B%22told%22%5D+%5B%22him%22%5D+%5B%22that%22%5D+%5B%22he%22%5D+%5B%22would%22%5D+%5B%22get%22%5D+%5B%22now%22%5D+%5B%22a%22%5D+%5B%22hundred%22%5D+%5B%22whips%22%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22has%22%5D+%5B%22to%22%5D+%5B%22count%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'they told him that he would get now a hundred whips and he has to count . ', 'right': '', 'complete_match': 'they told him that he would get now a hundred whips and he has to count . ', 'testimony_id': 'irn508479', 'shelfmark': ['USHMM RG-50.030*0411'], 'token_start': 31655, 'token_end': 31672}


In [67]:
fragment_3 = {}
fragment_3['original_sentence'] = "They neat-- they had to pull out their pants, and they beat them, whipped them, put down 25. They had to count it."
fragment_3['label']="They neat-- they had to pull out their pants, and they beat them, whipped them, put down 25. They had to count it."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22neat%22%5D+%5B%5D%7B0%2C3%7D+%5B%22they%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22pull%22%5D+%5B%22out%22%5D+%5B%22their%22%5D+%5B%22pants%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22beat%22%5D+%5B%22them%22%5D+%5B%5D+%5B%22whipped%22%5D+%5B%22them%22%5D+%5B%5D+%5B%22put%22%5D+%5B%22down%22%5D+%5B%2225%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22count%22%5D+%5B%22it%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They neat -- they had to pull out their pants , and they beat them , whipped them , put down 25 . They had to count it . ', 'right': '', 'complete_match': 'They neat -- they had to pull out their pants , and they beat them , whipped them , put down 25 . They had to count it . ', 'testimony_id': 'usc_shoah_1491', 'shelfmark': ['USC Shoah Foundation 1491'], 'token_start': 8115, 'token_end': 8144}


In [68]:
fragment_4 = {}
fragment_4['original_sentence'] = " And they used to have to count them 25. They used to get 25 whips."
fragment_4['label']= " And they used to have to count them 25. They used to get 25 whips."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22they%22%5D+%5B%22used%22%5D+%5B%22to%22%5D+%5B%22have%22%5D+%5B%22to%22%5D+%5B%22count%22%5D+%5B%22them%22%5D+%5B%2225%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22used%22%5D+%5B%22to%22%5D+%5B%22get%22%5D+%5B%2225%22%5D+%5B%22whips%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And they used to have to count them 25 . They used to get 25 whips . ', 'right': '', 'complete_match': 'And they used to have to count them 25 . They used to get 25 whips . ', 'testimony_id': 'usc_shoah_1496', 'shelfmark': ['USC Shoah Foundation 1496'], 'token_start': 5046, 'token_end': 5063}


In [69]:
fragment_5 = {}
fragment_5['original_sentence'] = "They used to get 25 whips. And if they missed it, they had to count all over again. The whips would cut the flesh."
fragment_5['label']= "They used to get 25 whips. And if they missed it, they had to count all over again. The whips would cut the flesh."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22used%22%5D+%5B%22to%22%5D+%5B%22get%22%5D+%5B%2225%22%5D+%5B%22whips%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22if%22%5D+%5B%22they%22%5D+%5B%22missed%22%5D+%5B%22it%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22count%22%5D+%5B%22all%22%5D+%5B%22over%22%5D+%5B%22again%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22whips%22%5D+%5B%22would%22%5D+%5B%22cut%22%5D+%5B%22the%22%5D+%5B%22flesh%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They used to get 25 whips . And if they missed it , they had to count all over again . The whips would cut the flesh . ', 'right': '', 'complete_match': 'They used to get 25 whips . And if they missed it , they had to count all over again . The whips would cut the flesh . ', 'testimony_id': 'usc_shoah_1496', 'shelfmark': ['USC Shoah Foundation 1496'], 'token_start': 5056, 'token_end': 5084}


In [70]:
add_testimonial_fragments(fragments)

### 7.  

In [71]:
lemmas = ["whip","work"]

In [72]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="whip"][]{0,10}[lemma="work"])|([lemma="work"][]{0,10}[lemma="whip"])


In [73]:
domain_term = "work"

In [74]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [75]:
fragment_1 = {}
fragment_1['original_sentence'] = "And if there were any complaints about anybody during the work of any kind, the person was whipped"
fragment_1['label']="And if there were any complaints about anybody during the work of any kind, the person was whipped (..)."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22if%22%5D+%5B%22there%22%5D+%5B%22were%22%5D+%5B%22any%22%5D+%5B%22complaints%22%5D+%5B%22about%22%5D+%5B%22anybody%22%5D+%5B%22during%22%5D+%5B%22the%22%5D+%5B%22work%22%5D+%5B%22of%22%5D+%5B%22any%22%5D+%5B%22kind%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22person%22%5D+%5B%22was%22%5D+%5B%22whipped%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And if there were any complaints about anybody during the work of any kind , the person was whipped ', 'right': '', 'complete_match': 'And if there were any complaints about anybody during the work of any kind , the person was whipped ', 'testimony_id': 'irn504840', 'shelfmark': ['USHMM RG-50.030*0346'], 'token_start': 16270, 'token_end': 16289}


In [76]:
fragment_2 = {}
fragment_2['original_sentence'] = "They went behind us and if we didn’t work hard enough to suit them we were whipped."
fragment_2['label']="They went behind us and if we didn’t work hard enough to suit them we were whipped."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22went%22%5D+%5B%22behind%22%5D+%5B%22us%22%5D+%5B%22and%22%5D+%5B%22if%22%5D+%5B%22we%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22work%22%5D+%5B%22hard%22%5D+%5B%22enough%22%5D+%5B%22to%22%5D+%5B%22suit%22%5D+%5B%22them%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22whipped%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They went behind us and if we did n’t work hard enough to suit them we were whipped . ', 'right': '', 'complete_match': 'They went behind us and if we did n’t work hard enough to suit them we were whipped . ', 'testimony_id': 'irn510486', 'shelfmark': ['USHMM RG-50.322*0032'], 'token_start': 10858, 'token_end': 10877}


In [77]:
fragment_3 = {}
fragment_3['original_sentence'] = "You were not allowed to pick up your head from the work that did. If you did, you had a whip around your neck."
fragment_3['label']="You were not allowed to pick up your head from the work that did. If you did, you had a whip around your neck."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22were%22%5D+%5B%22not%22%5D+%5B%22allowed%22%5D+%5B%22to%22%5D+%5B%22pick%22%5D+%5B%22up%22%5D+%5B%22your%22%5D+%5B%22head%22%5D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22work%22%5D+%5B%22that%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22If%22%5D+%5B%22you%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22had%22%5D+%5B%22a%22%5D+%5B%22whip%22%5D+%5B%22around%22%5D+%5B%22your%22%5D+%5B%22neck%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You were not allowed to pick up your head from the work that did . If you did , you had a whip around your neck . ', 'right': '', 'complete_match': 'You were not allowed to pick up your head from the work that did . If you did , you had a whip around your neck . ', 'testimony_id': 'usc_shoah_27443', 'shelfmark': ['USC Shoah Foundation 27443'], 'token_start': 9862, 'token_end': 9889}


In [78]:
fragment_4 = {}
fragment_4['original_sentence'] = "But they were like-- they used like those whips-- work, work, work, work."
fragment_4['label']= "But they were like-- they used like those whips-- work, work, work, work."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22But%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22like%22%5D+%5B%5D%7B0%2C3%7D+%5B%22they%22%5D+%5B%22used%22%5D+%5B%22like%22%5D+%5B%22those%22%5D+%5B%22whips%22%5D+%5B%5D%7B0%2C3%7D+%5B%22work%22%5D+%5B%5D+%5B%22work%22%5D+%5B%5D+%5B%22work%22%5D+%5B%5D+%5B%22work%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'But they were like -- they used like those whips -- work , work , work , work . ', 'right': '', 'complete_match': 'But they were like -- they used like those whips -- work , work , work , work . ', 'testimony_id': 'usc_shoah_3474', 'shelfmark': ['USC Shoah Foundation 3474'], 'token_start': 3769, 'token_end': 3788}


In [79]:
fragment_5 = {}
fragment_5['original_sentence'] = "you didn't work fast enough, they come with a whip or something like that."
fragment_5['label']= "(..)you didn't work fast enough, they come with a whip or something like that."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22you%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22work%22%5D+%5B%22fast%22%5D+%5B%22enough%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22come%22%5D+%5B%22with%22%5D+%5B%22a%22%5D+%5B%22whip%22%5D+%5B%22or%22%5D+%5B%22something%22%5D+%5B%22like%22%5D+%5B%22that%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "you did n't work fast enough , they come with a whip or something like that . ", 'right': '', 'complete_match': "you did n't work fast enough , they come with a whip or something like that . ", 'testimony_id': 'usc_shoah_8072', 'shelfmark': ['USC Shoah Foundation 8072'], 'token_start': 18671, 'token_end': 18688}


In [80]:
add_testimonial_fragments(fragments)