# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [93]:
import sys; sys.path.insert(0, '..')
import itertools

In [94]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [95]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [96]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [97]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [98]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [99]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [100]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [101]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [102]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [103]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [104]:
main_node = "sit"
delete_main_node("numbness")
add_main_node(main_node)

  


### Set up the query

query = '[lemma="sit"]'

result = topic_concordancer.main(query,window=25,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [105]:
lemmas = ["sit","truck"]

In [106]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="sit"][]{0,25}[lemma="truck"])|([lemma="truck"][]{0,25}[lemma="sit"])


In [107]:
domain_term = "truck"

In [108]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [109]:
fragment_1 = {}
fragment_1['original_sentence'] = "They put us on the truck. If you sit, you straight up your feet straight and on your feet sit another person."
fragment_1['label']="They put us on the truck. If you sit, you straight up your feet straight and on your feet sit another person."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22put%22%5D+%5B%22us%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22truck%22%5D+%5B%5D+%5B%22If%22%5D+%5B%22you%22%5D+%5B%22sit%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22straight%22%5D+%5B%22up%22%5D+%5B%22your%22%5D+%5B%22feet%22%5D+%5B%22straight%22%5D+%5B%22and%22%5D+%5B%22on%22%5D+%5B%22your%22%5D+%5B%22feet%22%5D+%5B%22sit%22%5D+%5B%22another%22%5D+%5B%22person%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They put us on the truck . If you sit , you straight up your feet straight and on your feet sit another person . ', 'right': '', 'complete_match': 'They put us on the truck . If you sit , you straight up your feet straight and on your feet sit another person . ', 'testimony_id': 'irn509173', 'shelfmark': ['USHMM RG-50.233*0090'], 'token_start': 6456, 'token_end': 6481}


In [110]:
fragment_2 = {}
fragment_2['original_sentence'] = " And we had to sit still there, you know, in these trucks, you know, and no-- no breathing or anything."
fragment_2['label']=" And we had to sit still there, you know, in these trucks, you know, and no-- no breathing or anything."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22sit%22%5D+%5B%22still%22%5D+%5B%22there%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22in%22%5D+%5B%22these%22%5D+%5B%22trucks%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22no%22%5D+%5B%5D%7B0%2C3%7D+%5B%22no%22%5D+%5B%22breathing%22%5D+%5B%22or%22%5D+%5B%22anything%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we had to sit still there , you know , in these trucks , you know , and no -- no breathing or anything . ', 'right': '', 'complete_match': 'And we had to sit still there , you know , in these trucks , you know , and no -- no breathing or anything . ', 'testimony_id': 'usc_shoah_5371', 'shelfmark': ['USC 5371'], 'token_start': 20908, 'token_end': 20934}


In [111]:
fragment_3 = {}
fragment_3['original_sentence'] = "Some couldn't sit down, others could."
fragment_3['label']="There were, I would say, 70 to 80 people to-- to a-- a-- a cattle truck. Some couldn't sit down, others could."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Some%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22sit%22%5D+%5B%22down%22%5D+%5B%5D+%5B%22others%22%5D+%5B%22could%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "Some could n't sit down , others could . ", 'right': '', 'complete_match': "Some could n't sit down , others could . ", 'testimony_id': 'usc_shoah_4187', 'shelfmark': ['USC 4187'], 'token_start': 9494, 'token_end': 9503}


In [112]:
fragment_4 = {}
fragment_4['original_sentence'] = "Anyhow, the following day, they load us up on trucks. And we were going, we don't know where. "
fragment_4['label']= "Anyhow, the following day, they load us up on trucks. And we were going, we don't know where. "
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Anyhow%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22following%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22load%22%5D+%5B%22us%22%5D+%5B%22up%22%5D+%5B%22on%22%5D+%5B%22trucks%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22going%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22do%22%5D+%5B%5D+%5B%22know%22%5D+%5B%22where%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "Anyhow , the following day , they load us up on trucks . And we were going , we do n't know where . ", 'right': '', 'complete_match': "Anyhow , the following day , they load us up on trucks . And we were going , we do n't know where . ", 'testimony_id': 'usc_shoah_20084', 'shelfmark': ['USC 20084'], 'token_start': 8402, 'token_end': 8426}


In [113]:
fragment_5 = {}
fragment_5['original_sentence'] = "You know, if it was a week or two weeks, to sit in the cattle trucks."
fragment_5['label']= "You know, if it was a week or two weeks, to sit in the cattle trucks."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22if%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22week%22%5D+%5B%22or%22%5D+%5B%22two%22%5D+%5B%22weeks%22%5D+%5B%5D+%5B%22to%22%5D+%5B%22sit%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22cattle%22%5D+%5B%22trucks%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You know , if it was a week or two weeks , to sit in the cattle trucks . ', 'right': '', 'complete_match': 'You know , if it was a week or two weeks , to sit in the cattle trucks . ', 'testimony_id': 'usc_shoah_13524', 'shelfmark': ['USC 13524'], 'token_start': 25195, 'token_end': 25214}


In [114]:
add_testimonial_fragments(fragments)

mid node exists cannot be added


### 2.  

In [115]:
lemmas = ["sit","dead"]

In [116]:
query = create_contextual_query(lemmas,context_length=4)
print (query)

([lemma="sit"][]{0,4}[lemma="dead"])|([lemma="dead"][]{0,4}[lemma="sit"])


In [117]:
domain_term = "on the dead"

In [118]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [119]:
fragment_1 = {}
fragment_1['original_sentence'] = "And the irony of it was when one it was like a lunch time, we sit on a dead body."
fragment_1['label']="And the irony of it was when one it was like a lunch time, we sit on a dead body."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22the%22%5D+%5B%22irony%22%5D+%5B%22of%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22when%22%5D+%5B%22one%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22like%22%5D+%5B%22a%22%5D+%5B%22lunch%22%5D+%5B%22time%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22sit%22%5D+%5B%22on%22%5D+%5B%22a%22%5D+%5B%22dead%22%5D+%5B%22body%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And the irony of it was when one it was like a lunch time , we sit on a dead body . ', 'right': '', 'complete_match': 'And the irony of it was when one it was like a lunch time , we sit on a dead body . ', 'testimony_id': 'irn504542', 'shelfmark': ['USHMM RG-50.030*0040'], 'token_start': 10912, 'token_end': 10934}


In [120]:
fragment_2 = {}
fragment_2['original_sentence'] = "Used to huddle together to keep warm, or we sat down on the dead people."
fragment_2['label']="Used to huddle together to keep warm, or we sat down on the dead people."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Used%22%5D+%5B%22to%22%5D+%5B%22huddle%22%5D+%5B%22together%22%5D+%5B%22to%22%5D+%5B%22keep%22%5D+%5B%22warm%22%5D+%5B%5D+%5B%22or%22%5D+%5B%22we%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22dead%22%5D+%5B%22people%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Used to huddle together to keep warm , or we sat down on the dead people . ', 'right': '', 'complete_match': 'Used to huddle together to keep warm , or we sat down on the dead people . ', 'testimony_id': 'irn510728', 'shelfmark': ['USHMM RG-50.154*0008'], 'token_start': 10540, 'token_end': 10557}


In [121]:
fragment_3 = {}
fragment_3['original_sentence'] = "It took me many years to understand that I was sitting surrounded by dead bodies. I was sitting on dead bodies."
fragment_3['label']="It took me many years to understand that I was sitting surrounded by dead bodies. I was sitting on dead bodies."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22It%22%5D+%5B%22took%22%5D+%5B%22me%22%5D+%5B%22many%22%5D+%5B%22years%22%5D+%5B%22to%22%5D+%5B%22understand%22%5D+%5B%22that%22%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22sitting%22%5D+%5B%22surrounded%22%5D+%5B%22by%22%5D+%5B%22dead%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22sitting%22%5D+%5B%22on%22%5D+%5B%22dead%22%5D+%5B%22bodies%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'It took me many years to understand that I was sitting surrounded by dead bodies . I was sitting on dead bodies . ', 'right': '', 'complete_match': 'It took me many years to understand that I was sitting surrounded by dead bodies . I was sitting on dead bodies . ', 'testimony_id': 'usc_shoah_19518', 'shelfmark': ['USC 19518'], 'token_start': 19858, 'token_end': 19881}


In [122]:
fragment_5 = {}
fragment_5['original_sentence'] = "In these train rides, I'd be sitting on top of dead people who died of typhus, tuberculosis, or other serious ailments"
fragment_5['label']= "In these train rides, I'd be sitting on top of dead people who died of typhus, tuberculosis, or other serious ailments"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22In%22%5D+%5B%22these%22%5D+%5B%22train%22%5D+%5B%22rides%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22%27d%22%5D+%5B%22be%22%5D+%5B%22sitting%22%5D+%5B%22on%22%5D+%5B%22top%22%5D+%5B%22of%22%5D+%5B%22dead%22%5D+%5B%22people%22%5D+%5B%22who%22%5D+%5B%22died%22%5D+%5B%22of%22%5D+%5B%22typhus%22%5D+%5B%5D+%5B%22tuberculosis%22%5D+%5B%5D+%5B%22or%22%5D+%5B%22other%22%5D+%5B%22serious%22%5D+%5B%22ailments%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "In these train rides , I 'd be sitting on top of dead people who died of typhus , tuberculosis , or other serious ailments ", 'right': '', 'complete_match': "In these train rides , I 'd be sitting on top of dead people who died of typhus , tuberculosis , or other serious ailments ", 'testimony_id': 'usc_shoah_25639', 'shelfmark': ['USC 25639'], 'token_start': 30345, 'token_end': 30370}


In [123]:
add_testimonial_fragments(fragments)

mid node exists cannot be added


### 3.  

In [124]:
lemmas = ["sit","die"]

In [125]:
query = create_contextual_query(lemmas,context_length=3)
print (query)

([lemma="sit"][]{0,3}[lemma="die"])|([lemma="die"][]{0,3}[lemma="sit"])


In [126]:
domain_term = "die"

In [127]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [128]:
fragment_1 = {}
fragment_1['original_sentence'] = "People died, they sat down and they couldn’t get up anymore."
fragment_1['label']="People died, they sat down and they couldn’t get up anymore."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22People%22%5D+%5B%22died%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22get%22%5D+%5B%22up%22%5D+%5B%22anymore%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'People died , they sat down and they could n’t get up anymore . ', 'right': '', 'complete_match': 'People died , they sat down and they could n’t get up anymore . ', 'testimony_id': 'irn508670', 'shelfmark': ['USHMM RG-50.462*0050'], 'token_start': 13479, 'token_end': 13493}


In [129]:
fragment_2 = {}
fragment_2['original_sentence'] = "He just sat down, he leaned against the wall, and he died of malnutrition and also dysentery."
fragment_2['label']="He just sat down, he leaned against the wall, and he died of malnutrition and also dysentery."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22He%22%5D+%5B%22just%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22leaned%22%5D+%5B%22against%22%5D+%5B%22the%22%5D+%5B%22wall%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22died%22%5D+%5B%22of%22%5D+%5B%22malnutrition%22%5D+%5B%22and%22%5D+%5B%22also%22%5D+%5B%22dysentery%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'He just sat down , he leaned against the wall , and he died of malnutrition and also dysentery . ', 'right': '', 'complete_match': 'He just sat down , he leaned against the wall , and he died of malnutrition and also dysentery . ', 'testimony_id': 'usc_shoah_14463', 'shelfmark': ['USC 14463'], 'token_start': 34438, 'token_end': 34458}


In [130]:
fragment_4 = {}
fragment_4['original_sentence'] = "the man that was sitting under the tree, he looked so real and so alive and he was dead. Maybe, he just sat down to die."
fragment_4['label']= "(..) the man that was sitting under the tree, he looked so real and so alive and he was dead. Maybe, he just sat down to die."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22the%22%5D+%5B%22man%22%5D+%5B%22that%22%5D+%5B%22was%22%5D+%5B%22sitting%22%5D+%5B%22under%22%5D+%5B%22the%22%5D+%5B%22tree%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22looked%22%5D+%5B%22so%22%5D+%5B%22real%22%5D+%5B%22and%22%5D+%5B%22so%22%5D+%5B%22alive%22%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22was%22%5D+%5B%22dead%22%5D+%5B%5D+%5B%22Maybe%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22just%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%22to%22%5D+%5B%22die%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'the man that was sitting under the tree , he looked so real and so alive and he was dead . Maybe , he just sat down to die . ', 'right': '', 'complete_match': 'the man that was sitting under the tree , he looked so real and so alive and he was dead . Maybe , he just sat down to die . ', 'testimony_id': 'irn508479', 'shelfmark': ['USHMM RG-50.030*0411'], 'token_start': 54805, 'token_end'

In [131]:
fragment_5 = {}
fragment_5['original_sentence'] = "You could just sit down and become a muselmann and you just die."
fragment_5['label']= "You could just sit down and become a muselmann and you just die."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22could%22%5D+%5B%22just%22%5D+%5B%22sit%22%5D+%5B%22down%22%5D+%5B%22and%22%5D+%5B%22become%22%5D+%5B%22a%22%5D+%5B%22muselmann%22%5D+%5B%22and%22%5D+%5B%22you%22%5D+%5B%22just%22%5D+%5B%22die%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You could just sit down and become a muselmann and you just die . ', 'right': '', 'complete_match': 'You could just sit down and become a muselmann and you just die . ', 'testimony_id': 'usc_shoah_1581', 'shelfmark': ['USC 1581'], 'token_start': 18089, 'token_end': 18103}


In [132]:
add_testimonial_fragments(fragments)

mid node exists cannot be added


### 4.  

In [133]:
lemmas = ["sit","shoot"]

In [134]:
query = create_contextual_query(lemmas,context_length=50)
print (query)

([lemma="sit"][]{0,50}[lemma="shoot"])|([lemma="shoot"][]{0,50}[lemma="sit"])


In [135]:
domain_term = "shoot"

In [136]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [137]:
fragment_1 = {}
fragment_1['original_sentence'] = "And you sat down, and they shot you?"
fragment_1['label']="And you sat down, and they shot you?"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22you%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22shot%22%5D+%5B%22you%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And you sat down , and they shot you ? ', 'right': '', 'complete_match': 'And you sat down , and they shot you ? ', 'testimony_id': 'usc_shoah_16', 'shelfmark': ['USC 16'], 'token_start': 8697, 'token_end': 8707}


In [138]:
fragment_2 = {}
fragment_2['original_sentence'] = "whoever sits down or slows down will shoot and all these things."
fragment_2['label']="(..) whoever sits down or slows down will shoot and all these things."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22whoever%22%5D+%5B%22sits%22%5D+%5B%22down%22%5D+%5B%22or%22%5D+%5B%22slows%22%5D+%5B%22down%22%5D+%5B%22will%22%5D+%5B%22shoot%22%5D+%5B%22and%22%5D+%5B%22all%22%5D+%5B%22these%22%5D+%5B%22things%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'whoever sits down or slows down will shoot and all these things . ', 'right': '', 'complete_match': 'whoever sits down or slows down will shoot and all these things . ', 'testimony_id': 'usc_shoah_10162', 'shelfmark': ['USC 10162'], 'token_start': 15525, 'token_end': 15538}


In [139]:
fragment_3 = {}
fragment_3['original_sentence'] = "And everybody who sit on the road, they get shot. So you were practically walking between dead people."
fragment_3['label']="And everybody who sit on the road, they get shot. So you were practically walking between dead people."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22everybody%22%5D+%5B%22who%22%5D+%5B%22sit%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22road%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22get%22%5D+%5B%22shot%22%5D+%5B%5D+%5B%22So%22%5D+%5B%22you%22%5D+%5B%22were%22%5D+%5B%22practically%22%5D+%5B%22walking%22%5D+%5B%22between%22%5D+%5B%22dead%22%5D+%5B%22people%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And everybody who sit on the road , they get shot . So you were practically walking between dead people . ', 'right': '', 'complete_match': 'And everybody who sit on the road , they get shot . So you were practically walking between dead people . ', 'testimony_id': 'usc_shoah_543', 'shelfmark': ['USC 543'], 'token_start': 17966, 'token_end': 17987}


In [140]:
fragment_4 = {}
fragment_4['original_sentence'] = "People who were sitting, who were too weak to get up were shot immediately."
fragment_4['label']= "People who were sitting, who were too weak to get up were shot immediately."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22People%22%5D+%5B%22who%22%5D+%5B%22were%22%5D+%5B%22sitting%22%5D+%5B%5D+%5B%22who%22%5D+%5B%22were%22%5D+%5B%22too%22%5D+%5B%22weak%22%5D+%5B%22to%22%5D+%5B%22get%22%5D+%5B%22up%22%5D+%5B%22were%22%5D+%5B%22shot%22%5D+%5B%22immediately%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'People who were sitting , who were too weak to get up were shot immediately . ', 'right': '', 'complete_match': 'People who were sitting , who were too weak to get up were shot immediately . ', 'testimony_id': 'irn516732', 'shelfmark': ['USHMM RG-50.030*0493'], 'token_start': 35179, 'token_end': 35195}


In [141]:
fragment_5 = {}
fragment_5['original_sentence'] = "If you sat down, they shot you on the spot."
fragment_5['label']= "If you sat down, they shot you on the spot."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22If%22%5D+%5B%22you%22%5D+%5B%22sat%22%5D+%5B%22down%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22shot%22%5D+%5B%22you%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22spot%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'If you sat down , they shot you on the spot . ', 'right': '', 'complete_match': 'If you sat down , they shot you on the spot . ', 'testimony_id': 'usc_shoah_27', 'shelfmark': ['USC 27'], 'token_start': 11696, 'token_end': 11708}


In [142]:
add_testimonial_fragments(fragments)

mid node exists cannot be added
