# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [1]:
import sys; sys.path.insert(0, '..')
import itertools

In [2]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [3]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [4]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [5]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [6]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [7]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [8]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [9]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [10]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [11]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [218]:
main_node = "walk"
delete_main_node(main_node)
add_main_node(main_node)

  


### Set up the query

In [13]:
query = '[lemma="walk"]'

result = topic_concordancer.main(query,window=20,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [219]:
lemmas = ["walk","kill"]

In [220]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="walk"][]{0,10}[lemma="kill"])|([lemma="kill"][]{0,10}[lemma="walk"])


In [221]:
domain_term = "kill"

In [222]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [223]:
fragment_1 = {}
fragment_1['original_sentence'] = "Whoever couldn't walk was killed wherever they stood by the roadside, on the road."
fragment_1['label']="Whoever couldn't walk was killed wherever they stood by the roadside, on the road."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Whoever%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22was%22%5D+%5B%22killed%22%5D+%5B%22wherever%22%5D+%5B%22they%22%5D+%5B%22stood%22%5D+%5B%22by%22%5D+%5B%22the%22%5D+%5B%22roadside%22%5D+%5B%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22road%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "Whoever could n't walk was killed wherever they stood by the roadside , on the road . ", 'right': '', 'complete_match': "Whoever could n't walk was killed wherever they stood by the roadside , on the road . ", 'testimony_id': 'HVT-108', 'shelfmark': ['Fortunoff HVT-108'], 'token_start': 8131, 'token_end': 8148}


In [224]:
fragment_3 = {}
fragment_3['original_sentence'] = "I walked away from other people that were killed behind me who couldn't walk all day long."
fragment_3['label']="I walked away from other people that were killed behind me who couldn't walk all day long."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22walked%22%5D+%5B%22away%22%5D+%5B%22from%22%5D+%5B%22other%22%5D+%5B%22people%22%5D+%5B%22that%22%5D+%5B%22were%22%5D+%5B%22killed%22%5D+%5B%22behind%22%5D+%5B%22me%22%5D+%5B%22who%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22all%22%5D+%5B%22day%22%5D+%5B%22long%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "I walked away from other people that were killed behind me who could n't walk all day long . ", 'right': '', 'complete_match': "I walked away from other people that were killed behind me who could n't walk all day long . ", 'testimony_id': 'HVT-22', 'shelfmark': ['Fortunoff HVT-22'], 'token_start': 18715, 'token_end': 18734}


In [225]:
fragment_4 = {}
fragment_4['original_sentence'] = " We thought we are not strong enough, we cannot walk. They will kill us if we can’t walk."
fragment_4['label']= " We thought we are not strong enough, we cannot walk. They will kill us if we can’t walk."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22thought%22%5D+%5B%22we%22%5D+%5B%22are%22%5D+%5B%22not%22%5D+%5B%22strong%22%5D+%5B%22enough%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22can%22%5D+%5B%22not%22%5D+%5B%22walk%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22will%22%5D+%5B%22kill%22%5D+%5B%22us%22%5D+%5B%22if%22%5D+%5B%22we%22%5D+%5B%22ca%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We thought we are not strong enough , we can not walk . They will kill us if we ca n’t walk . ', 'right': '', 'complete_match': 'We thought we are not strong enough , we can not walk . They will kill us if we ca n’t walk . ', 'testimony_id': 'irn509199', 'shelfmark': ['USHMM RG-50.233*0117'], 'token_start': 13471, 'token_end': 13494}


In [226]:
fragment_5 = {}
fragment_5['original_sentence'] = "a girl, she was walking there. And they kill her."
fragment_5['label']= "(..) a girl, she was walking there. And they kill her."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22a%22%5D+%5B%22girl%22%5D+%5B%5D+%5B%22she%22%5D+%5B%22was%22%5D+%5B%22walking%22%5D+%5B%22there%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22they%22%5D+%5B%22kill%22%5D+%5B%22her%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'a girl , she was walking there . And they kill her . ', 'right': '', 'complete_match': 'a girl , she was walking there . And they kill her . ', 'testimony_id': 'usc_shoah_16922', 'shelfmark': ['USC 16922'], 'token_start': 10285, 'token_end': 10298}


In [227]:
add_testimonial_fragments(fragments)

### 2.  

In [228]:
lemmas = ["walk","allow"]

In [229]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="walk"][]{0,10}[lemma="allow"])|([lemma="allow"][]{0,10}[lemma="walk"])


In [230]:
domain_term = "not allowed"

In [231]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [232]:
fragment_1 = {}
fragment_1['original_sentence'] = "And we were not allowed to walk down the street anymore."
fragment_1['label']="And we were not allowed to walk down the street anymore."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22not%22%5D+%5B%22allowed%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22down%22%5D+%5B%22the%22%5D+%5B%22street%22%5D+%5B%22anymore%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we were not allowed to walk down the street anymore . ', 'right': '', 'complete_match': 'And we were not allowed to walk down the street anymore . ', 'testimony_id': 'irn504571', 'shelfmark': ['USHMM RG-50.030*0075'], 'token_start': 2336, 'token_end': 2348}


In [233]:
fragment_2 = {}
fragment_2['original_sentence'] = "we couldn't walk on the pavement. A Jew wasn't allowed any such."
fragment_2['label']="(..) we couldn't walk on the pavement. A Jew wasn't allowed any such."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22we%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22pavement%22%5D+%5B%5D+%5B%22A%22%5D+%5B%22Jew%22%5D+%5B%22was%22%5D+%5B%5D+%5B%22allowed%22%5D+%5B%22any%22%5D+%5B%22such%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "we could n't walk on the pavement . A Jew was n't allowed any such . ", 'right': '', 'complete_match': "we could n't walk on the pavement . A Jew was n't allowed any such . ", 'testimony_id': 'irn508737', 'shelfmark': ['USHMM RG-50.462*0096'], 'token_start': 10724, 'token_end': 10740}


In [234]:
fragment_3 = {}
fragment_3['original_sentence'] = "Because we are Jews and we are not allowed to walk on the sidewalk."
fragment_3['label']="Because we are Jews and we are not allowed to walk on the sidewalk."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Because%22%5D+%5B%22we%22%5D+%5B%22are%22%5D+%5B%22Jews%22%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22are%22%5D+%5B%22not%22%5D+%5B%22allowed%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22sidewalk%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Because we are Jews and we are not allowed to walk on the sidewalk . ', 'right': '', 'complete_match': 'Because we are Jews and we are not allowed to walk on the sidewalk . ', 'testimony_id': 'irn509148', 'shelfmark': ['USHMM RG-50.233*0065'], 'token_start': 3018, 'token_end': 3033}


In [235]:
fragment_4 = {}
fragment_4['original_sentence'] = "And the Jews are not allowed to walk on the street until at least they-- if they have jobs."
fragment_4['label']= "And the Jews are not allowed to walk on the street until at least they-- if they have jobs."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22the%22%5D+%5B%22Jews%22%5D+%5B%22are%22%5D+%5B%22not%22%5D+%5B%22allowed%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22street%22%5D+%5B%22until%22%5D+%5B%22at%22%5D+%5B%22least%22%5D+%5B%22they%22%5D+%5B%5D%7B0%2C3%7D+%5B%22if%22%5D+%5B%22they%22%5D+%5B%22have%22%5D+%5B%22jobs%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And the Jews are not allowed to walk on the street until at least they -- if they have jobs . ', 'right': '', 'complete_match': 'And the Jews are not allowed to walk on the street until at least they -- if they have jobs . ', 'testimony_id': 'usc_shoah_10960', 'shelfmark': ['USC 10960'], 'token_start': 4225, 'token_end': 4246}


In [236]:
fragment_5 = {}
fragment_5['original_sentence'] = "They wouldn't allow any Jews walking at all in the streets."
fragment_5['label']= "They wouldn't allow any Jews walking at all in the streets."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22would%22%5D+%5B%5D+%5B%22allow%22%5D+%5B%22any%22%5D+%5B%22Jews%22%5D+%5B%22walking%22%5D+%5B%22at%22%5D+%5B%22all%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22streets%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "They would n't allow any Jews walking at all in the streets . ", 'right': '', 'complete_match': "They would n't allow any Jews walking at all in the streets . ", 'testimony_id': 'usc_shoah_19111', 'shelfmark': ['USC 19111'], 'token_start': 8083, 'token_end': 8096}


In [237]:
add_testimonial_fragments(fragments)

## Testimonial fragments

### 3.  

In [238]:
query = '([lemma="day"] []{0,4} [lemma="night"] []{0,10} [lemma="walk"] | [lemma="walk"] []{0,10} [lemma="day"] []{0,4} [lemma="night"])'

In [239]:
domain_term = "day and night"

In [240]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [241]:
fragment_1 = {}
fragment_1['original_sentence'] = "We was walking day and night."
fragment_1['label']="We was walking day and night."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22was%22%5D+%5B%22walking%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We was walking day and night . ', 'right': '', 'complete_match': 'We was walking day and night . ', 'testimony_id': 'HVT-12', 'shelfmark': ['Fortunoff HVT-12'], 'token_start': 5084, 'token_end': 5091}


In [242]:
fragment_2 = {}
fragment_2['original_sentence'] = "And I was sorry that I brought them people on the other side because we were walking two days and two nights."
fragment_2['label']="And I was sorry that I brought them people on the other side because we were walking two days and two nights."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22sorry%22%5D+%5B%22that%22%5D+%5B%22I%22%5D+%5B%22brought%22%5D+%5B%22them%22%5D+%5B%22people%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22other%22%5D+%5B%22side%22%5D+%5B%22because%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%22two%22%5D+%5B%22days%22%5D+%5B%22and%22%5D+%5B%22two%22%5D+%5B%22nights%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And I was sorry that I brought them people on the other side because we were walking two days and two nights . ', 'right': '', 'complete_match': 'And I was sorry that I brought them people on the other side because we were walking two days and two nights . ', 'testimony_id': 'irn504582', 'shelfmark': ['USHMM RG-50.030*0087'], 'token_start': 18407, 'token_end': 18430}


In [243]:
fragment_3 = {}
fragment_3['original_sentence'] = "So we were walking and walking and days and nights and walking and walking and at night, you know...uh...we stopped. "
fragment_3['label']="So we were walking and walking and days and nights and walking and walking and at night, you know...uh...we stopped."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%22and%22%5D+%5B%22walking%22%5D+%5B%22and%22%5D+%5B%22days%22%5D+%5B%22and%22%5D+%5B%22nights%22%5D+%5B%22and%22%5D+%5B%22walking%22%5D+%5B%22and%22%5D+%5B%22walking%22%5D+%5B%22and%22%5D+%5B%22at%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D%7B0%2C50%7D+%5B%22uh%22%5D+%5B%5D%7B0%2C50%7D+%5B%22we%22%5D+%5B%22stopped%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So we were walking and walking and days and nights and walking and walking and at night , you know ... uh ... we stopped . ', 'right': '', 'complete_match': 'So we were walking and walking and days and nights and walking and walking and at night , you know ... uh ... we stopped . ', 'testimony_id': 'irn504592', 'shelfmark': ['USHMM RG-50.030*0098'], 'token_start': 7695, 'token_end': 7721}


In [244]:
fragment_4 = {}
fragment_4['original_sentence'] = " And he walked and walked and walked, for days and days and days. He walked nights, "
fragment_4['label']= " And he walked and walked and walked, for days and days and days. He walked nights (..)."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22he%22%5D+%5B%22walked%22%5D+%5B%22and%22%5D+%5B%22walked%22%5D+%5B%22and%22%5D+%5B%22walked%22%5D+%5B%5D+%5B%22for%22%5D+%5B%22days%22%5D+%5B%22and%22%5D+%5B%22days%22%5D+%5B%22and%22%5D+%5B%22days%22%5D+%5B%5D+%5B%22He%22%5D+%5B%22walked%22%5D+%5B%22nights%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And he walked and walked and walked , for days and days and days . He walked nights , ', 'right': '', 'complete_match': 'And he walked and walked and walked , for days and days and days . He walked nights , ', 'testimony_id': 'usc_shoah_1765', 'shelfmark': ['USC 1765'], 'token_start': 4930, 'token_end': 4949}


In [245]:
fragment_5 = {}
fragment_5['original_sentence'] = "And we walked, and we walked for days and nights."
fragment_5['label']= "And we walked, and we walked for days and nights."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22walked%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22walked%22%5D+%5B%22for%22%5D+%5B%22days%22%5D+%5B%22and%22%5D+%5B%22nights%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we walked , and we walked for days and nights . ', 'right': '', 'complete_match': 'And we walked , and we walked for days and nights . ', 'testimony_id': 'usc_shoah_4967', 'shelfmark': ['USC 4967'], 'token_start': 21632, 'token_end': 21644}


In [246]:
add_testimonial_fragments(fragments)

## Testimonial fragments

### 4.  

In [247]:
lemmas = ["snow","walk"]

In [248]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="snow"][]{0,10}[lemma="walk"])|([lemma="walk"][]{0,10}[lemma="snow"])


In [249]:
domain_term = "snow"

In [250]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [251]:
fragment_1 = {}
fragment_1['original_sentence'] = "I walked barefoot in the snow, and it was terribly cold."
fragment_1['label']="I walked barefoot in the snow, and it was terribly cold."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22walked%22%5D+%5B%22barefoot%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22snow%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22terribly%22%5D+%5B%22cold%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I walked barefoot in the snow , and it was terribly cold . ', 'right': '', 'complete_match': 'I walked barefoot in the snow , and it was terribly cold . ', 'testimony_id': 'irn504408', 'shelfmark': ['USHMM RG-50.030*0203'], 'token_start': 25741, 'token_end': 25754}


In [252]:
fragment_2 = {}
fragment_2['original_sentence'] = "We were so tired of walking. I think it will start snowing or something later and there was a real night, I felt half, half dying."
fragment_2['label']="We were so tired of walking. I think it will start snowing or something later and there was a real night, I felt half, half dying."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22were%22%5D+%5B%22so%22%5D+%5B%22tired%22%5D+%5B%22of%22%5D+%5B%22walking%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22think%22%5D+%5B%22it%22%5D+%5B%22will%22%5D+%5B%22start%22%5D+%5B%22snowing%22%5D+%5B%22or%22%5D+%5B%22something%22%5D+%5B%22later%22%5D+%5B%22and%22%5D+%5B%22there%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22real%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22felt%22%5D+%5B%22half%22%5D+%5B%5D+%5B%22half%22%5D+%5B%22dying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We were so tired of walking . I think it will start snowing or something later and there was a real night , I felt half , half dying . ', 'right': '', 'complete_match': 'We were so tired of walking . I think it will start snowing or something later and there was a real night , I felt half , half dying . ', 'testimony_id': 'irn504582', 'shelfmark': ['USHMM RG-50.030*0087'], 'to

In [253]:
fragment_3 = {}
fragment_3['original_sentence'] = "We had to walk in the snow. The winter's are very difficult and heavy"
fragment_3['label']="We had to walk in the snow. The winter's are very difficult and heavy (..)."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22snow%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22winter%22%5D+%5B%5D+%5B%22are%22%5D+%5B%22very%22%5D+%5B%22difficult%22%5D+%5B%22and%22%5D+%5B%22heavy%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "We had to walk in the snow . The winter 's are very difficult and heavy ", 'right': '', 'complete_match': "We had to walk in the snow . The winter 's are very difficult and heavy ", 'testimony_id': 'irn504526', 'shelfmark': ['USHMM RG-50.030*0008'], 'token_start': 5350, 'token_end': 5366}


In [254]:
fragment_4 = {}
fragment_4['original_sentence'] = "And we're going to a big mountain, and on top the mountain, snow. Just to walk to the place, working place, was a effort."
fragment_4['label']= "And we're going to a big mountain, and on top the mountain, snow. Just to walk to the place, working place, was a effort."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%5D+%5B%22going%22%5D+%5B%22to%22%5D+%5B%22a%22%5D+%5B%22big%22%5D+%5B%22mountain%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22on%22%5D+%5B%22top%22%5D+%5B%22the%22%5D+%5B%22mountain%22%5D+%5B%5D+%5B%22snow%22%5D+%5B%5D+%5B%22Just%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22place%22%5D+%5B%5D+%5B%22working%22%5D+%5B%22place%22%5D+%5B%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22effort%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And we 're going to a big mountain , and on top the mountain , snow . Just to walk to the place , working place , was a effort . ", 'right': '', 'complete_match': "And we 're going to a big mountain , and on top the mountain , snow . Just to walk to the place , working place , was a effort . ", 'testimony_id': 'irn504639', 'shelfmark': ['USHMM RG-50.030*0145'], 'token_start': 13872, 'token_end

In [255]:
fragment_5 = {}
fragment_5['original_sentence'] = "every morning we had to walk at work in the snow to work."
fragment_5['label']= "(..) every morning we had to walk at work in the snow to work."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22every%22%5D+%5B%22morning%22%5D+%5B%22we%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22at%22%5D+%5B%22work%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22snow%22%5D+%5B%22to%22%5D+%5B%22work%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'every morning we had to walk at work in the snow to work . ', 'right': '', 'complete_match': 'every morning we had to walk at work in the snow to work . ', 'testimony_id': 'irn503624', 'shelfmark': ['USHMM RG-50.005*0028'], 'token_start': 8984, 'token_end': 8998}


In [256]:
add_testimonial_fragments(fragments)

## Testimonial fragments

### 5.  

In [257]:
lemmas = ["walk","shoe"]

In [258]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="walk"][]{0,10}[lemma="shoe"])|([lemma="shoe"][]{0,10}[lemma="walk"])


In [259]:
domain_term = "shoes"

In [260]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [261]:
fragment_1 = {}
fragment_1['original_sentence'] = "they gave me wooden shoes and they make me wear those shoes, I couldn’t walk in the shoes."
fragment_1['label']="(..) they gave me wooden shoes and they make me wear those shoes, I couldn’t walk in the shoes."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22they%22%5D+%5B%22gave%22%5D+%5B%22me%22%5D+%5B%22wooden%22%5D+%5B%22shoes%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22make%22%5D+%5B%22me%22%5D+%5B%22wear%22%5D+%5B%22those%22%5D+%5B%22shoes%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22shoes%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'they gave me wooden shoes and they make me wear those shoes , I could n’t walk in the shoes . ', 'right': '', 'complete_match': 'they gave me wooden shoes and they make me wear those shoes , I could n’t walk in the shoes . ', 'testimony_id': 'irn504888', 'shelfmark': ['USHMM RG-50.549.05*0002'], 'token_start': 13068, 'token_end': 13089}


In [262]:
fragment_2 = {}
fragment_2['original_sentence'] = "it was a difficult walk. I mean we didn't have shoes, just wooden type ---, and men were just falling down."
fragment_2['label']="it was a difficult walk. (..) we didn't have shoes, just wooden type ---, and men were just falling down."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22it%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22difficult%22%5D+%5B%22walk%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22mean%22%5D+%5B%22we%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22have%22%5D+%5B%22shoes%22%5D+%5B%5D+%5B%22just%22%5D+%5B%22wooden%22%5D+%5B%22type%22%5D+%5B%5D%7B0%2C3%7D+%5B%5D+%5B%22and%22%5D+%5B%22men%22%5D+%5B%22were%22%5D+%5B%22just%22%5D+%5B%22falling%22%5D+%5B%22down%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "it was a difficult walk . I mean we did n't have shoes , just wooden type --- , and men were just falling down . ", 'right': '', 'complete_match': "it was a difficult walk . I mean we did n't have shoes , just wooden type --- , and men were just falling down . ", 'testimony_id': 'irn504773', 'shelfmark': ['USHMM RG-50.030*0289'], 'token_start': 20363, 'token_end': 20389}


In [263]:
fragment_3 = {}
fragment_3['original_sentence'] = "I couldn't walk in the wooden shoes. They-- they made holes in my feet and it was just terrible,"
fragment_3['label']="I couldn't walk in the wooden shoes. (..) they made holes in my feet(..)."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22wooden%22%5D+%5B%22shoes%22%5D+%5B%5D+%5B%22They%22%5D+%5B%5D%7B0%2C3%7D+%5B%22they%22%5D+%5B%22made%22%5D+%5B%22holes%22%5D+%5B%22in%22%5D+%5B%22my%22%5D+%5B%22feet%22%5D+%5B%22and%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22just%22%5D+%5B%22terrible%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "I could n't walk in the wooden shoes . They -- they made holes in my feet and it was just terrible , ", 'right': '', 'complete_match': "I could n't walk in the wooden shoes . They -- they made holes in my feet and it was just terrible , ", 'testimony_id': 'HVT-134', 'shelfmark': ['Fortunoff HVT-134'], 'token_start': 15471, 'token_end': 15494}


In [264]:
fragment_4 = {}
fragment_4['original_sentence'] = "Twice daily I watched thousands marching by with a deathly expression, painfully walking in their wooden shoes."
fragment_4['label']= "Twice daily I watched thousands marching by with a deathly expression, painfully walking in their wooden shoes."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Twice%22%5D+%5B%22daily%22%5D+%5B%22I%22%5D+%5B%22watched%22%5D+%5B%22thousands%22%5D+%5B%22marching%22%5D+%5B%22by%22%5D+%5B%22with%22%5D+%5B%22a%22%5D+%5B%22deathly%22%5D+%5B%22expression%22%5D+%5B%5D+%5B%22painfully%22%5D+%5B%22walking%22%5D+%5B%22in%22%5D+%5B%22their%22%5D+%5B%22wooden%22%5D+%5B%22shoes%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Twice daily I watched thousands marching by with a deathly expression , painfully walking in their wooden shoes . ', 'right': '', 'complete_match': 'Twice daily I watched thousands marching by with a deathly expression , painfully walking in their wooden shoes . ', 'testimony_id': 'irn508623', 'shelfmark': ['USHMM RG-50.462*0002'], 'token_start': 1795, 'token_end': 1814}


In [265]:
fragment_5 = {}
fragment_5['original_sentence'] = "You were assigned shoes you couldn’t walk into and there was no way to get better shoes."
fragment_5['label']= "You were assigned shoes you couldn’t walk into and there was no way to get better shoes."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22were%22%5D+%5B%22assigned%22%5D+%5B%22shoes%22%5D+%5B%22you%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22into%22%5D+%5B%22and%22%5D+%5B%22there%22%5D+%5B%22was%22%5D+%5B%22no%22%5D+%5B%22way%22%5D+%5B%22to%22%5D+%5B%22get%22%5D+%5B%22better%22%5D+%5B%22shoes%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You were assigned shoes you could n’t walk into and there was no way to get better shoes . ', 'right': '', 'complete_match': 'You were assigned shoes you could n’t walk into and there was no way to get better shoes . ', 'testimony_id': 'irn510618', 'shelfmark': ['USHMM RG-50.150*0015'], 'token_start': 16069, 'token_end': 16088}


In [266]:
add_testimonial_fragments(fragments)

## Testimonial fragments

### 6.  

In [267]:
lemmas = ["walk","die"]

In [268]:
query = create_contextual_query(lemmas,context_length=15)
print (query)

([lemma="walk"][]{0,15}[lemma="die"])|([lemma="die"][]{0,15}[lemma="walk"])


In [269]:
domain_term = "die"

In [270]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [271]:
fragment_1 = {}
fragment_1['original_sentence'] = "They had not the strength to walk out. Both grandmothers after a couple of weeks died."
fragment_1['label']="They had not the strength to walk out. Both grandmothers after a couple of weeks died."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22had%22%5D+%5B%22not%22%5D+%5B%22the%22%5D+%5B%22strength%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22out%22%5D+%5B%5D+%5B%22Both%22%5D+%5B%22grandmothers%22%5D+%5B%22after%22%5D+%5B%22a%22%5D+%5B%22couple%22%5D+%5B%22of%22%5D+%5B%22weeks%22%5D+%5B%22died%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They had not the strength to walk out . Both grandmothers after a couple of weeks died . ', 'right': '', 'complete_match': 'They had not the strength to walk out . Both grandmothers after a couple of weeks died . ', 'testimony_id': 'usc_shoah_107', 'shelfmark': ['USC 107'], 'token_start': 6582, 'token_end': 6600}


In [272]:
fragment_2 = {}
fragment_2['original_sentence'] = "How many die a day. Half of them die walking."
fragment_2['label']="How many die a day. Half of them die walking."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22How%22%5D+%5B%22many%22%5D+%5B%22die%22%5D+%5B%22a%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22Half%22%5D+%5B%22of%22%5D+%5B%22them%22%5D+%5B%22die%22%5D+%5B%22walking%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'How many die a day . Half of them die walking . ', 'right': '', 'complete_match': 'How many die a day . Half of them die walking . ', 'testimony_id': 'irn504669', 'shelfmark': ['USHMM RG-50.030*0166'], 'token_start': 11168, 'token_end': 11180}


In [273]:
fragment_3 = {}
fragment_3['original_sentence'] = "Skeletons were walking. There was nothing – black lips, with – with dying"
fragment_3['label']="Skeletons were walking. There was nothing – black lips, with – with dying (..)."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Skeletons%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%5D+%5B%22There%22%5D+%5B%22was%22%5D+%5B%22nothing%22%5D+%5B%5D%7B0%2C3%7D+%5B%22black%22%5D+%5B%22lips%22%5D+%5B%5D+%5B%22with%22%5D+%5B%5D%7B0%2C3%7D+%5B%22with%22%5D+%5B%22dying%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Skeletons were walking . There was nothing – black lips , with – with dying ', 'right': '', 'complete_match': 'Skeletons were walking . There was nothing – black lips , with – with dying ', 'testimony_id': 'irn506634', 'shelfmark': ['USHMM RG-50.106*0122'], 'token_start': 12602, 'token_end': 12617}


In [274]:
fragment_4 = {}
fragment_4['original_sentence'] = "Who can walk, walked, who couldn’t walk, died there. It was terrible."
fragment_4['label']= "Who can walk, walked, who couldn’t walk, died there. It was terrible."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Who%22%5D+%5B%22can%22%5D+%5B%22walk%22%5D+%5B%5D+%5B%22walked%22%5D+%5B%5D+%5B%22who%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%5D+%5B%22died%22%5D+%5B%22there%22%5D+%5B%5D+%5B%22It%22%5D+%5B%22was%22%5D+%5B%22terrible%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Who can walk , walked , who could n’t walk , died there . It was terrible . ', 'right': '', 'complete_match': 'Who can walk , walked , who could n’t walk , died there . It was terrible . ', 'testimony_id': 'irn506705', 'shelfmark': ['USHMM RG-50.549.02*0039'], 'token_start': 3080, 'token_end': 3098}


In [275]:
fragment_5 = {}
fragment_5['original_sentence'] = "And we were walking, and walking, and falling, and dying."
fragment_5['label']= "And we were walking, and walking, and falling, and dying."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22walking%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22falling%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22dying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we were walking , and walking , and falling , and dying . ', 'right': '', 'complete_match': 'And we were walking , and walking , and falling , and dying . ', 'testimony_id': 'irn508669', 'shelfmark': ['USHMM RG-50.462*0049'], 'token_start': 4806, 'token_end': 4820}


In [276]:
add_testimonial_fragments(fragments)

### 7.  

In [277]:
lemmas = ["throw","walk"]

In [278]:
query = create_contextual_query(lemmas,context_length=20)
print (query)

([lemma="throw"][]{0,20}[lemma="walk"])|([lemma="walk"][]{0,20}[lemma="throw"])


In [279]:
domain_term = "throw"

In [280]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [281]:
fragment_1 = {}
fragment_1['original_sentence'] = "And we saw him throwing in old people and old people that couldn't walk, they throw them in."
fragment_1['label']=" And we saw him throwing in old people and old people that couldn't walk, they throw them in."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22saw%22%5D+%5B%22him%22%5D+%5B%22throwing%22%5D+%5B%22in%22%5D+%5B%22old%22%5D+%5B%22people%22%5D+%5B%22and%22%5D+%5B%22old%22%5D+%5B%22people%22%5D+%5B%22that%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22throw%22%5D+%5B%22them%22%5D+%5B%22in%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And we saw him throwing in old people and old people that could n't walk , they throw them in . ", 'right': '', 'complete_match': "And we saw him throwing in old people and old people that could n't walk , they throw them in . ", 'testimony_id': 'HVT-42', 'shelfmark': ['Fortunoff HVT-42'], 'token_start': 7623, 'token_end': 7644}


In [282]:
fragment_2 = {}
fragment_2['original_sentence'] = "And whoever couldn't walk fast enough or couldn't walk at all was just thrown in the side of the street"
fragment_2['label']="And whoever couldn't walk fast enough or couldn't walk at all was just thrown in the side of the street (..)."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22whoever%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22fast%22%5D+%5B%22enough%22%5D+%5B%22or%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22at%22%5D+%5B%22all%22%5D+%5B%22was%22%5D+%5B%22just%22%5D+%5B%22thrown%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22side%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22street%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And whoever could n't walk fast enough or could n't walk at all was just thrown in the side of the street ", 'right': '', 'complete_match': "And whoever could n't walk fast enough or could n't walk at all was just thrown in the side of the street ", 'testimony_id': 'HVT-10', 'shelfmark': ['Fortunoff HVT-10'], 'token_start': 7879, 'token_end': 7901}


In [283]:
fragment_3 = {}
fragment_3['original_sentence'] = "And you can now see they were throwing people that couldn't walk fast enough."
fragment_3['label']="And you can now see they were throwing people that couldn't walk fast enough."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22you%22%5D+%5B%22can%22%5D+%5B%22now%22%5D+%5B%22see%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22throwing%22%5D+%5B%22people%22%5D+%5B%22that%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%22fast%22%5D+%5B%22enough%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And you can now see they were throwing people that could n't walk fast enough . ", 'right': '', 'complete_match': "And you can now see they were throwing people that could n't walk fast enough . ", 'testimony_id': 'usc_shoah_23090', 'shelfmark': ['USC 23090'], 'token_start': 14207, 'token_end': 14223}


In [284]:
fragment_4 = {}
fragment_4['original_sentence'] = " whoever couldn't walk, they shot him and threw him in the ditch on the side of the road."
fragment_4['label']= "(..) whoever couldn't walk, they shot him and threw him in the ditch on the side of the road."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22whoever%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22shot%22%5D+%5B%22him%22%5D+%5B%22and%22%5D+%5B%22threw%22%5D+%5B%22him%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22ditch%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22side%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22road%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "whoever could n't walk , they shot him and threw him in the ditch on the side of the road . ", 'right': '', 'complete_match': "whoever could n't walk , they shot him and threw him in the ditch on the side of the road . ", 'testimony_id': 'usc_shoah_3212', 'shelfmark': ['USC 3212'], 'token_start': 20370, 'token_end': 20391}


In [285]:
fragment_5 = {}
fragment_5['original_sentence'] = "the other side was a ditch with people-- they threw them-- who couldn't walk."
fragment_5['label']= "(..) the other side was a ditch with people-- they threw them-- who couldn't walk."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22the%22%5D+%5B%22other%22%5D+%5B%22side%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22ditch%22%5D+%5B%22with%22%5D+%5B%22people%22%5D+%5B%5D%7B0%2C3%7D+%5B%22they%22%5D+%5B%22threw%22%5D+%5B%22them%22%5D+%5B%5D%7B0%2C3%7D+%5B%22who%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22walk%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "the other side was a ditch with people -- they threw them -- who could n't walk . ", 'right': '', 'complete_match': "the other side was a ditch with people -- they threw them -- who could n't walk . ", 'testimony_id': 'usc_shoah_3474', 'shelfmark': ['USC 3474'], 'token_start': 9598, 'token_end': 9616}


In [286]:
add_testimonial_fragments(fragments)

### 8.  

In [287]:
lemmas = ["walk","work"]

In [288]:
query = create_contextual_query(lemmas,context_length=15)
print (query)

([lemma="walk"][]{0,15}[lemma="work"])|([lemma="work"][]{0,15}[lemma="walk"])


In [289]:
domain_term = "work"

In [290]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [291]:
fragment_1 = {}
fragment_1['original_sentence'] = "we have to walk about six, seven miles. We have to walk, you know, like to the working place, you know."
fragment_1['label']="we have to walk about six, seven miles. (..) like to the working place (..)."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22we%22%5D+%5B%22have%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22about%22%5D+%5B%22six%22%5D+%5B%5D+%5B%22seven%22%5D+%5B%22miles%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22have%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22like%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22working%22%5D+%5B%22place%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'we have to walk about six , seven miles . We have to walk , you know , like to the working place , you know . ', 'right': '', 'complete_match': 'we have to walk about six , seven miles . We have to walk , you know , like to the working place , you know . ', 'testimony_id': 'HVT-163', 'shelfmark': ['Fortunoff HVT-163'], 'token_start': 18196, 'token_end': 18223}


In [292]:
fragment_2 = {}
fragment_2['original_sentence'] = "Winter, they took us every day to work. You had to walk at least... you walked 30, 35 kilometers a day."
fragment_2['label']="Winter, they took us every day to work. You had to walk at least... you walked 30, 35 kilometers a day."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Winter%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22took%22%5D+%5B%22us%22%5D+%5B%22every%22%5D+%5B%22day%22%5D+%5B%22to%22%5D+%5B%22work%22%5D+%5B%5D+%5B%22You%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22at%22%5D+%5B%22least%22%5D+%5B%5D%7B0%2C50%7D+%5B%22you%22%5D+%5B%22walked%22%5D+%5B%2230%22%5D+%5B%5D+%5B%2235%22%5D+%5B%22kilometers%22%5D+%5B%22a%22%5D+%5B%22day%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Winter , they took us every day to work . You had to walk at least ... you walked 30 , 35 kilometers a day . ', 'right': '', 'complete_match': 'Winter , they took us every day to work . You had to walk at least ... you walked 30 , 35 kilometers a day . ', 'testimony_id': 'irn504680', 'shelfmark': ['USHMM RG-50.030*0184'], 'token_start': 3338, 'token_end': 3364}


In [293]:
fragment_3 = {}
fragment_3['original_sentence'] = "We have to walk to work every day. Hungry."
fragment_3['label']="We have to walk to work every day. Hungry."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22have%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22to%22%5D+%5B%22work%22%5D+%5B%22every%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22Hungry%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We have to walk to work every day . Hungry . ', 'right': '', 'complete_match': 'We have to walk to work every day . Hungry . ', 'testimony_id': 'irn504669', 'shelfmark': ['USHMM RG-50.030*0166'], 'token_start': 2224, 'token_end': 2235}


In [294]:
fragment_4 = {}
fragment_4['original_sentence'] = "they were taken out on walks to go to the work site every day, there were skeletons"
fragment_4['label']= "(..)they were taken out on walks to go to the work site every day, there were skeletons (..)"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22they%22%5D+%5B%22were%22%5D+%5B%22taken%22%5D+%5B%22out%22%5D+%5B%22on%22%5D+%5B%22walks%22%5D+%5B%22to%22%5D+%5B%22go%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22work%22%5D+%5B%22site%22%5D+%5B%22every%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22there%22%5D+%5B%22were%22%5D+%5B%22skeletons%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'they were taken out on walks to go to the work site every day , there were skeletons ', 'right': '', 'complete_match': 'they were taken out on walks to go to the work site every day , there were skeletons ', 'testimony_id': 'irn504867', 'shelfmark': ['USHMM RG-50.030*0374'], 'token_start': 37115, 'token_end': 37133}


In [295]:
fragment_5 = {}
fragment_5['original_sentence'] = "every morning when we walked there, which was seven days a week and sometimes working 12 - 14 - 15 hours a day, that’s including in walking"
fragment_5['label']= "(..)every morning when we walked there, which was seven days a week and sometimes working 12 - 14 - 15 hours a day, that’s including in walking(..)"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22every%22%5D+%5B%22morning%22%5D+%5B%22when%22%5D+%5B%22we%22%5D+%5B%22walked%22%5D+%5B%22there%22%5D+%5B%5D+%5B%22which%22%5D+%5B%22was%22%5D+%5B%22seven%22%5D+%5B%22days%22%5D+%5B%22a%22%5D+%5B%22week%22%5D+%5B%22and%22%5D+%5B%22sometimes%22%5D+%5B%22working%22%5D+%5B%2212%22%5D+%5B%5D%7B0%2C3%7D+%5B%2214%22%5D+%5B%5D%7B0%2C3%7D+%5B%2215%22%5D+%5B%22hours%22%5D+%5B%22a%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22that%22%5D+%5B%5D+%5B%22including%22%5D+%5B%22in%22%5D+%5B%22walking%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'every morning when we walked there , which was seven days a week and sometimes working 12 - 14 - 15 hours a day , that ’s including in walking ', 'right': '', 'complete_match': 'every morning when we walked there , which was seven days a week and sometimes working 12 - 14 - 15 hours a day , that ’s including in walking ', 'testimony_id': 'irn506643', 'she

In [296]:
add_testimonial_fragments(fragments)

### 9.  

In [297]:
lemmas = ["barefoot","walk"]

In [298]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="barefoot"][]{0,10}[lemma="walk"])|([lemma="walk"][]{0,10}[lemma="barefoot"])


In [299]:
domain_term = "barefoot"

In [300]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [301]:
fragment_1 = {}
fragment_1['original_sentence'] = "So you had to walk almost barefoot in the snow."
fragment_1['label']="So you had to walk almost barefoot in the snow."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22you%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22almost%22%5D+%5B%22barefoot%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22snow%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So you had to walk almost barefoot in the snow . ', 'right': '', 'complete_match': 'So you had to walk almost barefoot in the snow . ', 'testimony_id': 'HVT-102', 'shelfmark': ['Fortunoff HVT-102'], 'token_start': 11864, 'token_end': 11875}


In [302]:
fragment_2 = {}
fragment_2['original_sentence'] = "already half-dead from the same difficulty of walking barefoot."
fragment_2['label']="(..)already half-dead from the same difficulty of walking barefoot"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22already%22%5D+%5B%5D%7B0%2C3%7D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22same%22%5D+%5B%22difficulty%22%5D+%5B%22of%22%5D+%5B%22walking%22%5D+%5B%22barefoot%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'already half-dead from the same difficulty of walking barefoot . ', 'right': '', 'complete_match': 'already half-dead from the same difficulty of walking barefoot . ', 'testimony_id': 'irn504703', 'shelfmark': ['USHMM RG-50.030*0211'], 'token_start': 6548, 'token_end': 6558}


In [303]:
fragment_3 = {}
fragment_3['original_sentence'] = "I had no shoes. I was walking barefoot. And I caught-- I tore off a piece of my blanket."
fragment_3['label']="I had no shoes. I was walking barefoot. And I caught-- I tore off a piece of my blanket."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22had%22%5D+%5B%22no%22%5D+%5B%22shoes%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22walking%22%5D+%5B%22barefoot%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22I%22%5D+%5B%22caught%22%5D+%5B%5D%7B0%2C3%7D+%5B%22I%22%5D+%5B%22tore%22%5D+%5B%22off%22%5D+%5B%22a%22%5D+%5B%22piece%22%5D+%5B%22of%22%5D+%5B%22my%22%5D+%5B%22blanket%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I had no shoes . I was walking barefoot . And I caught -- I tore off a piece of my blanket . ', 'right': '', 'complete_match': 'I had no shoes . I was walking barefoot . And I caught -- I tore off a piece of my blanket . ', 'testimony_id': 'HVT-4', 'shelfmark': ['Fortunoff HVT-4'], 'token_start': 21230, 'token_end': 21253}


In [304]:
fragment_4 = {}
fragment_4['original_sentence'] = "And sometimes the fields were cut, the wheat was cut, and it was rough. And some of us were walking barefoot."
fragment_4['label']= "And sometimes the fields were cut, the wheat was cut, and it was rough. And some of us were walking barefoot."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22sometimes%22%5D+%5B%22the%22%5D+%5B%22fields%22%5D+%5B%22were%22%5D+%5B%22cut%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22wheat%22%5D+%5B%22was%22%5D+%5B%22cut%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22rough%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22some%22%5D+%5B%22of%22%5D+%5B%22us%22%5D+%5B%22were%22%5D+%5B%22walking%22%5D+%5B%22barefoot%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And sometimes the fields were cut , the wheat was cut , and it was rough . And some of us were walking barefoot . ', 'right': '', 'complete_match': 'And sometimes the fields were cut , the wheat was cut , and it was rough . And some of us were walking barefoot . ', 'testimony_id': 'HVT-5', 'shelfmark': ['Fortunoff HVT-5'], 'token_start': 9770, 'token_end': 9795}


In [305]:
fragment_5 = {}
fragment_5['original_sentence'] = "And I have my feet barefooted-- nothing on the feet. And I had to walk through the fields, bleeding."
fragment_5['label']= "And I have my feet barefooted-- nothing on the feet. And I had to walk through the fields, bleeding."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22have%22%5D+%5B%22my%22%5D+%5B%22feet%22%5D+%5B%22barefooted%22%5D+%5B%5D%7B0%2C3%7D+%5B%22nothing%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22feet%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22I%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22walk%22%5D+%5B%22through%22%5D+%5B%22the%22%5D+%5B%22fields%22%5D+%5B%5D+%5B%22bleeding%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And I have my feet barefooted -- nothing on the feet . And I had to walk through the fields , bleeding . ', 'right': '', 'complete_match': 'And I have my feet barefooted -- nothing on the feet . And I had to walk through the fields , bleeding . ', 'testimony_id': 'usc_shoah_9878', 'shelfmark': ['USC 9878'], 'token_start': 14499, 'token_end': 14522}


In [306]:
add_testimonial_fragments(fragments)