# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [1]:
import sys; sys.path.insert(0, '..')
import itertools

In [2]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [3]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [4]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [5]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [6]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [7]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [8]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [9]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [10]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [11]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [12]:
main_node = "smell"
delete_main_node(main_node)
add_main_node(main_node)

  


### Set up the query

query = '[lemma="smell"]'

result = topic_concordancer.main(query,window=15,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [13]:
lemmas = ["body","smell"]

In [14]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="body"][]{0,10}[lemma="smell"])|([lemma="smell"][]{0,10}[lemma="body"])


In [15]:
domain_term = "dead bodies"

In [16]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [17]:
fragment_1 = {}
fragment_1['original_sentence'] = "That was supposed to be food but they didn't feed us anymore and since it was a very warm spring they decomposed and smelled so horrible and with the rest of the dead bodies, the stench was terrible. You could almost choke, breathing that air there."
fragment_1['label']="(..) smelled so horrible and with the rest of the dead bodies, the stench was terrible. You could almost choke, breathing that air there."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22That%22%5D+%5B%22was%22%5D+%5B%22supposed%22%5D+%5B%22to%22%5D+%5B%22be%22%5D+%5B%22food%22%5D+%5B%22but%22%5D+%5B%22they%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22feed%22%5D+%5B%22us%22%5D+%5B%22anymore%22%5D+%5B%22and%22%5D+%5B%22since%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22very%22%5D+%5B%22warm%22%5D+%5B%22spring%22%5D+%5B%22they%22%5D+%5B%22decomposed%22%5D+%5B%22and%22%5D+%5B%22smelled%22%5D+%5B%22so%22%5D+%5B%22horrible%22%5D+%5B%22and%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22rest%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22dead%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22stench%22%5D+%5B%22was%22%5D+%5B%22terrible%22%5D+%5B%5D+%5B%22You%22%5D+%5B%22could%22%5D+%5B%22almost%22%5D+%5B%22choke%22%5D+%5B%5D+%5B%22breathing%22%5D+%5B%22that%22%5D+%5B%22air%22%5D+%5B%22there%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "That was suppose

In [18]:
fragment_2 = {}
fragment_2['original_sentence'] = "The the meat was rotting away from their bodies. The smell...it was horrific."
fragment_2['label']="The the meat was rotting away from their bodies. The smell...it was horrific."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22the%22%5D+%5B%22meat%22%5D+%5B%22was%22%5D+%5B%22rotting%22%5D+%5B%22away%22%5D+%5B%22from%22%5D+%5B%22their%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22smell%22%5D+%5B%5D%7B0%2C50%7D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22horrific%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The the meat was rotting away from their bodies . The smell ... it was horrific . ', 'right': '', 'complete_match': 'The the meat was rotting away from their bodies . The smell ... it was horrific . ', 'testimony_id': 'irn504690', 'shelfmark': ['USHMM RG-50.030*0195'], 'token_start': 7165, 'token_end': 7182}


In [19]:
fragment_3 = {}
fragment_3['original_sentence'] = "Yeah, well I smelled dead bodies and stench and I mean, the place smelled."
fragment_3['label']="Yeah, well I smelled dead bodies and stench and I mean, the place smelled."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Yeah%22%5D+%5B%5D+%5B%22well%22%5D+%5B%22I%22%5D+%5B%22smelled%22%5D+%5B%22dead%22%5D+%5B%22bodies%22%5D+%5B%22and%22%5D+%5B%22stench%22%5D+%5B%22and%22%5D+%5B%22I%22%5D+%5B%22mean%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22place%22%5D+%5B%22smelled%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Yeah , well I smelled dead bodies and stench and I mean , the place smelled . ', 'right': '', 'complete_match': 'Yeah , well I smelled dead bodies and stench and I mean , the place smelled . ', 'testimony_id': 'usc_shoah_13483', 'shelfmark': ['USC Shoah Foundation 13483'], 'token_start': 13846, 'token_end': 13863}


In [20]:
fragment_4 = {}
fragment_4['original_sentence'] = "It was a terrible smell, hair and bodies. You could smell. And was very scary."
fragment_4['label']= "It was a terrible smell, hair and bodies. You could smell. And was very scary."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22It%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22terrible%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22hair%22%5D+%5B%22and%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22You%22%5D+%5B%22could%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22was%22%5D+%5B%22very%22%5D+%5B%22scary%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'It was a terrible smell , hair and bodies . You could smell . And was very scary . ', 'right': '', 'complete_match': 'It was a terrible smell , hair and bodies . You could smell . And was very scary . ', 'testimony_id': 'usc_shoah_15610', 'shelfmark': ['USC Shoah Foundation 15610'], 'token_start': 7753, 'token_end': 7772}


In [21]:
fragment_5 = {}
fragment_5['original_sentence'] = "These thousands of dead bodies piled up and the smell, the stink"
fragment_5['label']= "These thousands of dead bodies piled up and the smell, the stink (..)."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22These%22%5D+%5B%22thousands%22%5D+%5B%22of%22%5D+%5B%22dead%22%5D+%5B%22bodies%22%5D+%5B%22piled%22%5D+%5B%22up%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22stink%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'These thousands of dead bodies piled up and the smell , the stink ', 'right': '', 'complete_match': 'These thousands of dead bodies piled up and the smell , the stink ', 'testimony_id': 'usc_shoah_942', 'shelfmark': ['USC Shoah Foundation 942'], 'token_start': 32193, 'token_end': 32206}


In [22]:
add_testimonial_fragments(fragments)

### 2.  

In [23]:
lemmas = ["hair","smell"]

In [24]:
query = create_contextual_query(lemmas,context_length=4)
print (query)

([lemma="hair"][]{0,4}[lemma="smell"])|([lemma="smell"][]{0,4}[lemma="hair"])


In [25]:
domain_term = "hair"

In [26]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [27]:
fragment_1 = {}
fragment_1['original_sentence'] = "It smelled like burned hair. If you take hair and burn it, that’s what it smelled like."
fragment_1['label']="It smelled like burned hair. If you take hair and burn it, that’s what it smelled like."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22It%22%5D+%5B%22smelled%22%5D+%5B%22like%22%5D+%5B%22burned%22%5D+%5B%22hair%22%5D+%5B%5D+%5B%22If%22%5D+%5B%22you%22%5D+%5B%22take%22%5D+%5B%22hair%22%5D+%5B%22and%22%5D+%5B%22burn%22%5D+%5B%22it%22%5D+%5B%5D+%5B%22that%22%5D+%5B%5D+%5B%22what%22%5D+%5B%22it%22%5D+%5B%22smelled%22%5D+%5B%22like%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'It smelled like burned hair . If you take hair and burn it , that ’s what it smelled like . ', 'right': '', 'complete_match': 'It smelled like burned hair . If you take hair and burn it , that ’s what it smelled like . ', 'testimony_id': 'irn509382', 'shelfmark': ['USHMM RG-50.544*0001'], 'token_start': 4422, 'token_end': 4443}


In [28]:
fragment_2 = {}
fragment_2['original_sentence'] = "But the smell was terrible-- together, hair smelling, body smelling."
fragment_2['label']="But the smell was terrible-- together, hair smelling, body smelling."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22But%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22was%22%5D+%5B%22terrible%22%5D+%5B%5D%7B0%2C3%7D+%5B%22together%22%5D+%5B%5D+%5B%22hair%22%5D+%5B%22smelling%22%5D+%5B%5D+%5B%22body%22%5D+%5B%22smelling%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'But the smell was terrible -- together , hair smelling , body smelling . ', 'right': '', 'complete_match': 'But the smell was terrible -- together , hair smelling , body smelling . ', 'testimony_id': 'usc_shoah_25835', 'shelfmark': ['USC Shoah Foundation 25835'], 'token_start': 11645, 'token_end': 11659}


In [29]:
fragment_4 = {}
fragment_4['original_sentence'] = " this is a crematorium and we felt the smell of hair, of bones, so I said no. This is impossible."
fragment_4['label']= "(..) this is a crematorium and we felt the smell of hair, of bones, so I said no. This is impossible."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22this%22%5D+%5B%22is%22%5D+%5B%22a%22%5D+%5B%22crematorium%22%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22felt%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22of%22%5D+%5B%22hair%22%5D+%5B%5D+%5B%22of%22%5D+%5B%22bones%22%5D+%5B%5D+%5B%22so%22%5D+%5B%22I%22%5D+%5B%22said%22%5D+%5B%22no%22%5D+%5B%5D+%5B%22This%22%5D+%5B%22is%22%5D+%5B%22impossible%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'this is a crematorium and we felt the smell of hair , of bones , so I said no . This is impossible . ', 'right': '', 'complete_match': 'this is a crematorium and we felt the smell of hair , of bones , so I said no . This is impossible . ', 'testimony_id': 'usc_shoah_766', 'shelfmark': ['USC Shoah Foundation 766'], 'token_start': 27554, 'token_end': 27578}


In [30]:
fragment_5 = {}
fragment_5['original_sentence'] = "And you smell the hair and the bone, and-- you-- you make-- you don't know what it was."
fragment_5['label']= "And you smell the hair and the bone, and-- you-- you make-- you don't know what it was."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22you%22%5D+%5B%22smell%22%5D+%5B%22the%22%5D+%5B%22hair%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22bone%22%5D+%5B%5D+%5B%22and%22%5D+%5B%5D%7B0%2C3%7D+%5B%22you%22%5D+%5B%5D%7B0%2C3%7D+%5B%22you%22%5D+%5B%22make%22%5D+%5B%5D%7B0%2C3%7D+%5B%22you%22%5D+%5B%22do%22%5D+%5B%5D+%5B%22know%22%5D+%5B%22what%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And you smell the hair and the bone , and -- you -- you make -- you do n't know what it was . ", 'right': '', 'complete_match': "And you smell the hair and the bone , and -- you -- you make -- you do n't know what it was . ", 'testimony_id': 'usc_shoah_7684', 'shelfmark': ['USC Shoah Foundation 7684'], 'token_start': 6227, 'token_end': 6251}


In [31]:
add_testimonial_fragments(fragments)

### 3.  

In [32]:
lemmas = ["gas","smell"]

In [33]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="gas"][]{0,10}[lemma="smell"])|([lemma="smell"][]{0,10}[lemma="gas"])


In [34]:
domain_term = "gas"

In [35]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [36]:
fragment_1 = {}
fragment_1['original_sentence'] = "And we were all-- we all smelled the gas. Believe it or not, it was not true."
fragment_1['label']="And we were all-- we all smelled the gas. Believe it or not, it was not true."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22all%22%5D+%5B%5D%7B0%2C3%7D+%5B%22we%22%5D+%5B%22all%22%5D+%5B%22smelled%22%5D+%5B%22the%22%5D+%5B%22gas%22%5D+%5B%5D+%5B%22Believe%22%5D+%5B%22it%22%5D+%5B%22or%22%5D+%5B%22not%22%5D+%5B%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22not%22%5D+%5B%22true%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we were all -- we all smelled the gas . Believe it or not , it was not true . ', 'right': '', 'complete_match': 'And we were all -- we all smelled the gas . Believe it or not , it was not true . ', 'testimony_id': 'HVT-134', 'shelfmark': ['Fortunoff Archive HVT-134'], 'token_start': 1950, 'token_end': 1971}


In [37]:
fragment_2 = {}
fragment_2['original_sentence'] = " So I knew that's where it happened. I could still smell the gas."
fragment_2['label']=" So I knew that's where it happened. I could still smell the gas."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22I%22%5D+%5B%22knew%22%5D+%5B%22that%22%5D+%5B%5D+%5B%22where%22%5D+%5B%22it%22%5D+%5B%22happened%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22could%22%5D+%5B%22still%22%5D+%5B%22smell%22%5D+%5B%22the%22%5D+%5B%22gas%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "So I knew that 's where it happened . I could still smell the gas . ", 'right': '', 'complete_match': "So I knew that 's where it happened . I could still smell the gas . ", 'testimony_id': 'usc_shoah_26140', 'shelfmark': ['USC Shoah Foundation 26140'], 'token_start': 8882, 'token_end': 8898}


In [38]:
fragment_3 = {}
fragment_3['original_sentence'] = "Day by day, we suffered. People died. The gas smell."
fragment_3['label']=" Day by day, we suffered. People died. The gas smell."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Day%22%5D+%5B%22by%22%5D+%5B%22day%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22suffered%22%5D+%5B%5D+%5B%22People%22%5D+%5B%22died%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22gas%22%5D+%5B%22smell%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Day by day , we suffered . People died . The gas smell . ', 'right': '', 'complete_match': 'Day by day , we suffered . People died . The gas smell . ', 'testimony_id': 'usc_shoah_628', 'shelfmark': ['USC Shoah Foundation 628'], 'token_start': 19694, 'token_end': 19708}


In [39]:
fragment_4 = {}
fragment_4['original_sentence'] = "The scene is in stony bunker, the last plea for breath, the smell of gas, the eerie silence, dead."
fragment_4['label']= "The scene is in stony bunker, the last plea for breath, the smell of gas, the eerie silence, dead."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22scene%22%5D+%5B%22is%22%5D+%5B%22in%22%5D+%5B%22stony%22%5D+%5B%22bunker%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22last%22%5D+%5B%22plea%22%5D+%5B%22for%22%5D+%5B%22breath%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22of%22%5D+%5B%22gas%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22eerie%22%5D+%5B%22silence%22%5D+%5B%5D+%5B%22dead%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The scene is in stony bunker , the last plea for breath , the smell of gas , the eerie silence , dead . ', 'right': '', 'complete_match': 'The scene is in stony bunker , the last plea for breath , the smell of gas , the eerie silence , dead . ', 'testimony_id': 'usc_shoah_20686', 'shelfmark': ['USC Shoah Foundation 20686'], 'token_start': 13697, 'token_end': 13721}


In [40]:
add_testimonial_fragments(fragments)

### 4.  

In [41]:
lemmas = ["body","smell"]

In [42]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="body"][]{0,10}[lemma="smell"])|([lemma="smell"][]{0,10}[lemma="body"])


In [43]:
domain_term = "burning bodies"

In [44]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [45]:
fragment_1 = {}
fragment_1['original_sentence'] = "And the smell of the burning bodies. They gathered all the Jews in a barn at the end of this town and put a burning fire."
fragment_1['label']="And the smell of the burning bodies. They gathered all the Jews in a barn at the end of this town and put a burning fire."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22burning%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22gathered%22%5D+%5B%22all%22%5D+%5B%22the%22%5D+%5B%22Jews%22%5D+%5B%22in%22%5D+%5B%22a%22%5D+%5B%22barn%22%5D+%5B%22at%22%5D+%5B%22the%22%5D+%5B%22end%22%5D+%5B%22of%22%5D+%5B%22this%22%5D+%5B%22town%22%5D+%5B%22and%22%5D+%5B%22put%22%5D+%5B%22a%22%5D+%5B%22burning%22%5D+%5B%22fire%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And the smell of the burning bodies . They gathered all the Jews in a barn at the end of this town and put a burning fire . ', 'right': '', 'complete_match': 'And the smell of the burning bodies . They gathered all the Jews in a barn at the end of this town and put a burning fire . ', 'testimony_id': 'HVT-172', 'shelfmark': ['Fortunoff Archive HVT-172'], 'token_start': 6543, 'token_end': 6571}


In [46]:
fragment_2 = {}
fragment_2['original_sentence'] = "And then we could smell the burning of the bodies, the human flesh burning."
fragment_2['label']="And then we could smell the burning of the bodies, the human flesh burning."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22then%22%5D+%5B%22we%22%5D+%5B%22could%22%5D+%5B%22smell%22%5D+%5B%22the%22%5D+%5B%22burning%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22human%22%5D+%5B%22flesh%22%5D+%5B%22burning%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And then we could smell the burning of the bodies , the human flesh burning . ', 'right': '', 'complete_match': 'And then we could smell the burning of the bodies , the human flesh burning . ', 'testimony_id': 'irn504659', 'shelfmark': ['USHMM RG-50.030*0161'], 'token_start': 4631, 'token_end': 4647}


In [47]:
fragment_3 = {}
fragment_3['original_sentence'] = "Then we realized the dreadful smell, and we knew those were the burned bodies that we smelled."
fragment_3['label']="Then we realized the dreadful smell, and we knew those were the burned bodies that we smelled."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Then%22%5D+%5B%22we%22%5D+%5B%22realized%22%5D+%5B%22the%22%5D+%5B%22dreadful%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22knew%22%5D+%5B%22those%22%5D+%5B%22were%22%5D+%5B%22the%22%5D+%5B%22burned%22%5D+%5B%22bodies%22%5D+%5B%22that%22%5D+%5B%22we%22%5D+%5B%22smelled%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Then we realized the dreadful smell , and we knew those were the burned bodies that we smelled . ', 'right': '', 'complete_match': 'Then we realized the dreadful smell , and we knew those were the burned bodies that we smelled . ', 'testimony_id': 'usc_shoah_5496', 'shelfmark': ['USC Shoah Foundation 5496'], 'token_start': 5942, 'token_end': 5961}


In [48]:
fragment_4 = {}
fragment_4['original_sentence'] = "You could smell the air from burnt bodies."
fragment_4['label']= "You could smell the air from burnt bodies."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22could%22%5D+%5B%22smell%22%5D+%5B%22the%22%5D+%5B%22air%22%5D+%5B%22from%22%5D+%5B%22burnt%22%5D+%5B%22bodies%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You could smell the air from burnt bodies . ', 'right': '', 'complete_match': 'You could smell the air from burnt bodies . ', 'testimony_id': 'usc_shoah_8352', 'shelfmark': ['USC Shoah Foundation 8352'], 'token_start': 10063, 'token_end': 10072}


In [49]:
fragment_5 = {}
fragment_5['original_sentence'] = " We smelled burning bodies nonstop. And we looked at the tall chimneys, and there was smoke coming out."
fragment_5['label']= " We smelled burning bodies nonstop. And we looked at the tall chimneys, and there was smoke coming out."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22smelled%22%5D+%5B%22burning%22%5D+%5B%22bodies%22%5D+%5B%22nonstop%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22we%22%5D+%5B%22looked%22%5D+%5B%22at%22%5D+%5B%22the%22%5D+%5B%22tall%22%5D+%5B%22chimneys%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22there%22%5D+%5B%22was%22%5D+%5B%22smoke%22%5D+%5B%22coming%22%5D+%5B%22out%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We smelled burning bodies nonstop . And we looked at the tall chimneys , and there was smoke coming out . ', 'right': '', 'complete_match': 'We smelled burning bodies nonstop . And we looked at the tall chimneys , and there was smoke coming out . ', 'testimony_id': 'usc_shoah_13524', 'shelfmark': ['USC Shoah Foundation 13524'], 'token_start': 24871, 'token_end': 24892}


In [50]:
add_testimonial_fragments(fragments)

### 5.  

In [51]:
lemmas = ["smell","toilet"]

In [52]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="smell"][]{0,10}[lemma="toilet"])|([lemma="toilet"][]{0,10}[lemma="smell"])


In [53]:
domain_term = "toilet"

In [54]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [55]:
fragment_1 = {}
fragment_1['original_sentence'] = "naturally everybody had to use that toilet, very har -- it was -- it was terrible smell, terrible smell because of the toilet"
fragment_1['label']="(..) naturally everybody had to use that toilet, very har -- it was -- it was terrible smell, terrible smell because of the toilet (..)."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22naturally%22%5D+%5B%22everybody%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22use%22%5D+%5B%22that%22%5D+%5B%22toilet%22%5D+%5B%5D+%5B%22very%22%5D+%5B%22har%22%5D+%5B%5D%7B0%2C3%7D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%5D%7B0%2C3%7D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22terrible%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22terrible%22%5D+%5B%22smell%22%5D+%5B%22because%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22toilet%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'naturally everybody had to use that toilet , very har -- it was -- it was terrible smell , terrible smell because of the toilet ', 'right': '', 'complete_match': 'naturally everybody had to use that toilet , very har -- it was -- it was terrible smell , terrible smell because of the toilet ', 'testimony_id': 'irn509676', 'shelfmark': ['USHMM RG-50.030*0415'], 'token_start': 10173, 'token_end': 10198}


In [56]:
fragment_2 = {}
fragment_2['original_sentence'] = "They put a few dishes, cans. This was the toilet. The terrible smell."
fragment_2['label']="They put a few dishes, cans. This was the toilet. The terrible smell."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22put%22%5D+%5B%22a%22%5D+%5B%22few%22%5D+%5B%22dishes%22%5D+%5B%5D+%5B%22cans%22%5D+%5B%5D+%5B%22This%22%5D+%5B%22was%22%5D+%5B%22the%22%5D+%5B%22toilet%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22terrible%22%5D+%5B%22smell%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They put a few dishes , cans . This was the toilet . The terrible smell . ', 'right': '', 'complete_match': 'They put a few dishes , cans . This was the toilet . The terrible smell . ', 'testimony_id': 'usc_shoah_27770', 'shelfmark': ['USC Shoah Foundation 27770'], 'token_start': 9154, 'token_end': 9171}


In [57]:
fragment_3 = {}
fragment_3['original_sentence'] = "The room was half the size of this room with a bucket where everybody was going to toilet, you know. Smelling-- the smell was unbelievable."
fragment_3['label']="(..) everybody was going to toilet, you know. Smelling-- the smell was unbelievable."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22room%22%5D+%5B%22was%22%5D+%5B%22half%22%5D+%5B%22the%22%5D+%5B%22size%22%5D+%5B%22of%22%5D+%5B%22this%22%5D+%5B%22room%22%5D+%5B%22with%22%5D+%5B%22a%22%5D+%5B%22bucket%22%5D+%5B%22where%22%5D+%5B%22everybody%22%5D+%5B%22was%22%5D+%5B%22going%22%5D+%5B%22to%22%5D+%5B%22toilet%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22Smelling%22%5D+%5B%5D%7B0%2C3%7D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22was%22%5D+%5B%22unbelievable%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The room was half the size of this room with a bucket where everybody was going to toilet , you know . Smelling -- the smell was unbelievable . ', 'right': '', 'complete_match': 'The room was half the size of this room with a bucket where everybody was going to toilet , you know . Smelling -- the smell was unbelievable . ', 'testimony_id': 'usc_shoah_5275', 'shelfmark':

In [58]:
fragment_4 = {}
fragment_4['original_sentence'] = "You had to go-- when you had to do your-- your shit like on the toilet. There was a corner there. It was smelling."
fragment_4['label']= "(..) your shit like on the toilet. There was a corner there. It was smelling."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22go%22%5D+%5B%5D%7B0%2C3%7D+%5B%22when%22%5D+%5B%22you%22%5D+%5B%22had%22%5D+%5B%22to%22%5D+%5B%22do%22%5D+%5B%22your%22%5D+%5B%5D%7B0%2C3%7D+%5B%22your%22%5D+%5B%22shit%22%5D+%5B%22like%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22toilet%22%5D+%5B%5D+%5B%22There%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22corner%22%5D+%5B%22there%22%5D+%5B%5D+%5B%22It%22%5D+%5B%22was%22%5D+%5B%22smelling%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'You had to go -- when you had to do your -- your shit like on the toilet . There was a corner there . It was smelling . ', 'right': '', 'complete_match': 'You had to go -- when you had to do your -- your shit like on the toilet . There was a corner there . It was smelling . ', 'testimony_id': 'usc_shoah_628', 'shelfmark': ['USC Shoah Foundation 628'], 'token_start': 34834, 'token_end': 3486

In [59]:
fragment_5 = {}
fragment_5['original_sentence'] = "In one of the huts in the far corner, some sort of toilet which smelled bloody horrible."
fragment_5['label']= "In one of the huts in the far corner, some sort of toilet which smelled bloody horrible."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22In%22%5D+%5B%22one%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22huts%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22far%22%5D+%5B%22corner%22%5D+%5B%5D+%5B%22some%22%5D+%5B%22sort%22%5D+%5B%22of%22%5D+%5B%22toilet%22%5D+%5B%22which%22%5D+%5B%22smelled%22%5D+%5B%22bloody%22%5D+%5B%22horrible%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'In one of the huts in the far corner , some sort of toilet which smelled bloody horrible . ', 'right': '', 'complete_match': 'In one of the huts in the far corner , some sort of toilet which smelled bloody horrible . ', 'testimony_id': 'usc_shoah_13483', 'shelfmark': ['USC Shoah Foundation 13483'], 'token_start': 13390, 'token_end': 13409}


In [60]:
add_testimonial_fragments(fragments)

### 6.  

In [61]:
lemmas = ["smell","urine"]

In [62]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="smell"][]{0,10}[lemma="urine"])|([lemma="urine"][]{0,10}[lemma="smell"])


In [63]:
domain_term = "urine"

In [64]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [65]:
fragment_1 = {}
fragment_1['original_sentence'] = "And the smell, and and the urine, and people crowded in pretty nearly standing position, and all."
fragment_1['label']="And the smell, and and the urine, and people crowded in pretty nearly standing position, and all."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22urine%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22people%22%5D+%5B%22crowded%22%5D+%5B%22in%22%5D+%5B%22pretty%22%5D+%5B%22nearly%22%5D+%5B%22standing%22%5D+%5B%22position%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22all%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And the smell , and and the urine , and people crowded in pretty nearly standing position , and all . ', 'right': '', 'complete_match': 'And the smell , and and the urine , and people crowded in pretty nearly standing position , and all . ', 'testimony_id': 'usc_shoah_7744', 'shelfmark': ['USC Shoah Foundation 7744'], 'token_start': 4992, 'token_end': 5013}


In [66]:
fragment_2 = {}
fragment_2['original_sentence'] = "Because the, the sanitary toilets was with urine, everything was running around, and it was smelling all around."
fragment_2['label']="Because the, the sanitary toilets was with urine, everything was running around, and it was smelling all around."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Because%22%5D+%5B%22the%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22sanitary%22%5D+%5B%22toilets%22%5D+%5B%22was%22%5D+%5B%22with%22%5D+%5B%22urine%22%5D+%5B%5D+%5B%22everything%22%5D+%5B%22was%22%5D+%5B%22running%22%5D+%5B%22around%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22smelling%22%5D+%5B%22all%22%5D+%5B%22around%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Because the , the sanitary toilets was with urine , everything was running around , and it was smelling all around . ', 'right': '', 'complete_match': 'Because the , the sanitary toilets was with urine , everything was running around , and it was smelling all around . ', 'testimony_id': 'HVT-157', 'shelfmark': ['Fortunoff Archive HVT-157'], 'token_start': 10461, 'token_end': 10483}


In [67]:
fragment_3 = {}
fragment_3['original_sentence'] = "you ended up getting some sprinkles of that urine"
fragment_3['label']="(..)you ended up getting some sprinkles of that urine (..) and that urine had a smell and it impregnated what our clothes (..) "
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22you%22%5D+%5B%22ended%22%5D+%5B%22up%22%5D+%5B%22getting%22%5D+%5B%22some%22%5D+%5B%22sprinkles%22%5D+%5B%22of%22%5D+%5B%22that%22%5D+%5B%22urine%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'you ended up getting some sprinkles of that urine ', 'right': '', 'complete_match': 'you ended up getting some sprinkles of that urine ', 'testimony_id': 'irn504453', 'shelfmark': ['USHMM RG-50.030*0021'], 'token_start': 11873, 'token_end': 11882}


In [68]:
fragment_4 = {}
fragment_4['original_sentence'] = "And that trip in the wagons, again-- urine, feces, smell, odor, sickness, vomiting, death experienced for about a day and a half,"
fragment_4['label']= "(..) urine, feces, smell, odor, sickness, vomiting, death experienced for about a day and a half (..)"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22that%22%5D+%5B%22trip%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22wagons%22%5D+%5B%5D+%5B%22again%22%5D+%5B%5D%7B0%2C3%7D+%5B%22urine%22%5D+%5B%5D+%5B%22feces%22%5D+%5B%5D+%5B%22smell%22%5D+%5B%5D+%5B%22odor%22%5D+%5B%5D+%5B%22sickness%22%5D+%5B%5D+%5B%22vomiting%22%5D+%5B%5D+%5B%22death%22%5D+%5B%22experienced%22%5D+%5B%22for%22%5D+%5B%22about%22%5D+%5B%22a%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22a%22%5D+%5B%22half%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And that trip in the wagons , again -- urine , feces , smell , odor , sickness , vomiting , death experienced for about a day and a half , ', 'right': '', 'complete_match': 'And that trip in the wagons , again -- urine , feces , smell , odor , sickness , vomiting , death experienced for about a day and a half , ', 'testimony_id': 'usc_shoah_19895', 'shelfmark': ['USC Shoah Foundation 1

In [69]:
fragment_5 = {}
fragment_5['original_sentence'] = "these were all pretty modern barrack buildings, reeked of the odor of urine"
fragment_5['label']= "(..)these were all pretty modern barrack buildings, reeked of the odor of urine (..)."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22these%22%5D+%5B%22were%22%5D+%5B%22all%22%5D+%5B%22pretty%22%5D+%5B%22modern%22%5D+%5B%22barrack%22%5D+%5B%22buildings%22%5D+%5B%5D+%5B%22reeked%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22odor%22%5D+%5B%22of%22%5D+%5B%22urine%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'these were all pretty modern barrack buildings , reeked of the odor of urine ', 'right': '', 'complete_match': 'these were all pretty modern barrack buildings , reeked of the odor of urine ', 'testimony_id': 'irn511053', 'shelfmark': ['USHMM RG-50.470*0008'], 'token_start': 1041, 'token_end': 1055}


In [70]:
add_testimonial_fragments(fragments)

### 7.  

In [71]:
lemmas = ["smoke","smell"]

In [72]:
query = create_contextual_query(lemmas,context_length=15)
print (query)

([lemma="smoke"][]{0,15}[lemma="smell"])|([lemma="smell"][]{0,15}[lemma="smoke"])


In [73]:
domain_term = "smoke"

In [74]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [75]:
fragment_1 = {}
fragment_1['original_sentence'] = "And again, we smelled the smoke of burning flesh day and night."
fragment_1['label']="And again, we smelled the smoke of burning flesh day and night."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22again%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22smelled%22%5D+%5B%22the%22%5D+%5B%22smoke%22%5D+%5B%22of%22%5D+%5B%22burning%22%5D+%5B%22flesh%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And again , we smelled the smoke of burning flesh day and night . ', 'right': '', 'complete_match': 'And again , we smelled the smoke of burning flesh day and night . ', 'testimony_id': 'HVT-61', 'shelfmark': ['Fortunoff Archive HVT-61'], 'token_start': 6518, 'token_end': 6532}


In [76]:
fragment_2 = {}
fragment_2['original_sentence'] = "We could see the chimneys. We could smell the smoke. We knew the people that were taken over there. "
fragment_2['label']="We could see the chimneys. We could smell the smoke. We knew the people that were taken over there. "
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22could%22%5D+%5B%22see%22%5D+%5B%22the%22%5D+%5B%22chimneys%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22could%22%5D+%5B%22smell%22%5D+%5B%22the%22%5D+%5B%22smoke%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22knew%22%5D+%5B%22the%22%5D+%5B%22people%22%5D+%5B%22that%22%5D+%5B%22were%22%5D+%5B%22taken%22%5D+%5B%22over%22%5D+%5B%22there%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We could see the chimneys . We could smell the smoke . We knew the people that were taken over there . ', 'right': '', 'complete_match': 'We could see the chimneys . We could smell the smoke . We knew the people that were taken over there . ', 'testimony_id': 'irn504818', 'shelfmark': ['USHMM RG-50.030*0324'], 'token_start': 5805, 'token_end': 5827}


In [77]:
fragment_3 = {}
fragment_3['original_sentence'] = "you could see spitting fire from the chimneys, real tall chimneys, heavy smoke, and the smell was awful."
fragment_3['label']="(..)you could see spitting fire from the chimneys, real tall chimneys, heavy smoke, and the smell was awful."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22you%22%5D+%5B%22could%22%5D+%5B%22see%22%5D+%5B%22spitting%22%5D+%5B%22fire%22%5D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22chimneys%22%5D+%5B%5D+%5B%22real%22%5D+%5B%22tall%22%5D+%5B%22chimneys%22%5D+%5B%5D+%5B%22heavy%22%5D+%5B%22smoke%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22smell%22%5D+%5B%22was%22%5D+%5B%22awful%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'you could see spitting fire from the chimneys , real tall chimneys , heavy smoke , and the smell was awful . ', 'right': '', 'complete_match': 'you could see spitting fire from the chimneys , real tall chimneys , heavy smoke , and the smell was awful . ', 'testimony_id': 'irn506634', 'shelfmark': ['USHMM RG-50.106*0122'], 'token_start': 6600, 'token_end': 6622}


In [78]:
fragment_4 = {}
fragment_4['original_sentence'] = "We can see the smoke, you know, we can see this, you can smell this, you know, odor, "
fragment_4['label']= "We can see the smoke, you know, we can see this, you can smell this, you know, odor (..)."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22can%22%5D+%5B%22see%22%5D+%5B%22the%22%5D+%5B%22smoke%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22can%22%5D+%5B%22see%22%5D+%5B%22this%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22can%22%5D+%5B%22smell%22%5D+%5B%22this%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22odor%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We can see the smoke , you know , we can see this , you can smell this , you know , odor , ', 'right': '', 'complete_match': 'We can see the smoke , you know , we can see this , you can smell this , you know , odor , ', 'testimony_id': 'irn509676', 'shelfmark': ['USHMM RG-50.030*0415'], 'token_start': 13496, 'token_end': 13520}


In [79]:
fragment_5 = {}
fragment_5['original_sentence'] = "We saw the smoke. We smelled the smoke."
fragment_5['label']= "We saw the smoke. We smelled the smoke."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22saw%22%5D+%5B%22the%22%5D+%5B%22smoke%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22smelled%22%5D+%5B%22the%22%5D+%5B%22smoke%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We saw the smoke . We smelled the smoke . ', 'right': '', 'complete_match': 'We saw the smoke . We smelled the smoke . ', 'testimony_id': 'usc_shoah_323', 'shelfmark': ['USC Shoah Foundation 323'], 'token_start': 12112, 'token_end': 12122}


In [80]:
add_testimonial_fragments(fragments)