In [1]:
from ipywidgets import widgets
import xml.etree.ElementTree as ET
from IPython.display import display, Markdown, Javascript

from indra.sources import eidos
from indra.assemblers import CAGAssembler
from indra.literature import elsevier_client
from indra.assemblers import PysbAssembler
from indra.explanation.model_checker import ModelChecker
import indra.tools.assemble_corpus as ac

global titles
global articles
global statements
global search_term

from paper_reading import *

def on_search(b):
    global titles
    global articles
    global search_term
    piis = elsevier_client.get_piis(search_term.value, start_year=2017)
    print('I found a total of %d papers%s.' % (len(piis), (', I\'ll show you the first 10' if len(piis) > 10 else '')))
    articles = [elsevier_client.download_article(pii, id_type='pii') for pii in piis[:10]]
    titles = [ET.fromstring(content).findall('*/dc:title',
              namespaces=elsevier_client.elsevier_ns)[0].text.strip() for content in articles]
    for idx, title in enumerate(titles):
        clean_pii = piis[idx].replace('(', '').replace(')', '')
        printmd('* %d: <a href="https://www.sciencedirect.com/science/article/pii/%s" target="_blank">%s</a>' % (idx, clean_pii, title))

def on_nl(b):
    global statements
    text = nl_input.value
    ep = eidos.process_text(text, webservice='http://localhost:5000')
    print('We extracted %d statements:' % len(statements))
    for stmt in ep.statements:
        sg = stmt.subj.db_refs['UN'][0][0].split('/')[-1]
        og = stmt.obj.db_refs['UN'][0][0].split('/')[-1]
        printmd('* **%s**(%s) %s **%s**(%s)' % (sg, stmt.subj.name, '-|' if stmt.overall_polarity() == -1 else '->', og, stmt.obj.name))
    statements += ep.statements
    

def on_read(b):
    global articles
    global statements
    raw_txt = elsevier_client.extract_text(articles[int(paper_id.value)])
    if not raw_txt:
        print('There seemed to be no usable content in that paper. Try another one!')
        statements = []
    if 'Internal Server Error' in raw_txt:
        print('Sorry, that paper was not accessible for reading.')
        statements = []
    ep = eidos.process_text(raw_txt, webservice='http://localhost:5000')
    statements = ep.statements
    print('We extracted %d statements:' % len(statements))
    for stmt in statements:
        sg = stmt.subj.db_refs['UN'][0][0].split('/')[-1]
        og = stmt.obj.db_refs['UN'][0][0].split('/')[-1]
        printmd('* **%s**(%s) %s **%s**(%s)' % (sg, stmt.subj.name, '-|' if stmt.overall_polarity() == -1 else '->', og, stmt.obj.name))
        
def on_query(b):
    ste = eidos.process_text(query.value, webservice='http://localhost:5000').statements
    if not ste:
        print('Sorry, I couldn\'t get any statements out of that')
        return None
    pa = PysbAssembler()
    pa.add_statements(statements)
    model = pa.make_model()
    mc = ModelChecker(model, statements=standardize_names(ste))
    mc.prune_influence_map()
    res = mc.check_model()
    paths = res[0][1].paths
    for path in paths:
        print(path)

# Interactive reading and assembly

### Enter a search topic you are interested in

In [2]:
search_term = widgets.Text(value='conflict south sudan')
find_papers = widgets.Button(description='Find')
find_papers.on_click(on_search)
display(search_term, find_papers)

INFO: [2018-08-01 13:55:22] indra/elsevier - 428
INFO: [2018-08-01 13:55:22] indra/elsevier - Found link to next batch of results.
INFO: [2018-08-01 13:55:27] indra/elsevier - 428
INFO: [2018-08-01 13:55:27] indra/elsevier - Found link to next batch of results.
INFO: [2018-08-01 13:55:29] indra/elsevier - 428


I found a total of 428 papers, I'll show you the first 10.


* 0: <a href="https://www.sciencedirect.com/science/article/pii/S0738-05931630391-1" target="_blank">Factors affecting early grade educational attainment: Evidence from South Sudan</a>

* 1: <a href="https://www.sciencedirect.com/science/article/pii/S0264-410X1631141-0" target="_blank">Pneumonia prevention: Cost-effectiveness analyses of two vaccines among refugee children aged under two years, Haemophilus influenzae type b-containing and pneumococcal conjugate vaccines, during a humanitarian emergency, Yida camp, South Sudan</a>

* 2: <a href="https://www.sciencedirect.com/science/article/pii/S0140-67361730280-5" target="_blank">South Sudan: aftermaths of 3 years of armed conflict</a>

* 3: <a href="https://www.sciencedirect.com/science/article/pii/S2214-109X1630288-1" target="_blank">Elimination of sleeping sickness in Uganda could be jeopardised by conflict in South Sudan</a>

* 4: <a href="https://www.sciencedirect.com/science/article/pii/S0895-39881830009-6" target="_blank">Epidemiology of Measles Cases in South Darfur State, Sudan, 2011–2015</a>

* 5: <a href="https://www.sciencedirect.com/science/article/pii/S2352-61811630130-5" target="_blank">Households' dietary habits and food consumption patterns in Hamishkoreib locality, Kassala State, Sudan</a>

* 6: <a href="https://www.sciencedirect.com/science/article/pii/S0304-40171730137-1" target="_blank">Genotyping of Theileria lestoquardi from sheep and goats in Sudan to support control of Malignant Ovine Theileriosis</a>

* 7: <a href="https://www.sciencedirect.com/science/article/pii/S0379-07381630554-0" target="_blank">High concentrations of lead and barium in hair of the rural population caused by water pollution in the Thar Jath oilfields in South Sudan</a>

* 8: <a href="https://www.sciencedirect.com/science/article/pii/S0140-67361731351-X" target="_blank">Famine in South Sudan</a>

* 9: <a href="https://www.sciencedirect.com/science/article/pii/S0277-53951630196-0" target="_blank">“Only the person who wears the shoes knows where the shoes pinch”: Reworking bottom-up approach in South Sudan</a>

INFO: [2018-08-01 13:56:31] indra/elsevier - 69


I found a total of 69 papers, I'll show you the first 10.


* 0: <a href="https://www.sciencedirect.com/science/article/pii/S0169-80951630714-1" target="_blank">Concentration of daily precipitation in the contiguous United States</a>

* 1: <a href="https://www.sciencedirect.com/science/article/pii/S0341-81621730020-6" target="_blank">Runoff and erosion processes on bare slopes in the Karst Rocky Desertification Area</a>

* 2: <a href="https://www.sciencedirect.com/science/article/pii/S2214-58181630187-2" target="_blank">Assessment of the consistency among global precipitation products over the United Arab Emirates</a>

* 3: <a href="https://www.sciencedirect.com/science/article/pii/S2351-98941730059-8" target="_blank">Arthropod diversity and assemblage structure response to deforestation and desertification in the Sahel of western Senegal</a>

* 4: <a href="https://www.sciencedirect.com/science/article/pii/S1110-98231630170-3" target="_blank">Spectral mixture analysis (SMA) and change vector analysis (CVA) methods for monitoring and mapping land degradation/desertification in arid and semiarid areas (Sudan), using Landsat imagery</a>

* 5: <a href="https://www.sciencedirect.com/science/article/pii/S0009-25411730568-5" target="_blank">Insight into factors controlling formation rates of pedogenic carbonates: A combined geochemical and isotopic approach in dryland soils of the US Southwest</a>

* 6: <a href="https://www.sciencedirect.com/science/article/pii/S0169-20461730016-6" target="_blank">Identification of landscape character types for trans-regional integration in the Wuling Mountain multi-ethnic area of southwest China</a>

* 7: <a href="https://www.sciencedirect.com/science/article/pii/S2095-31191661507-1" target="_blank">Assessment for soil loss by using a scheme of alterative sub-models based on the RUSLE in a Karst Basin of Southwest China</a>

* 8: <a href="https://www.sciencedirect.com/science/article/pii/S0038-092X1730485-1" target="_blank">A GIS-based Fuzzy-AHP method for the evaluation of solar farms locations: Case study in Khuzestan province, Iran</a>

* 9: <a href="https://www.sciencedirect.com/science/article/pii/B978-0-12-801712-8.00004-4" target="_blank">Chapter 4 Human Impacts on the Global Landscape</a>

I found a total of 428 papers, I'll show you the first 10.


* 0: <a href="https://www.sciencedirect.com/science/article/pii/S0738-05931630391-1" target="_blank">Factors affecting early grade educational attainment: Evidence from South Sudan</a>

* 1: <a href="https://www.sciencedirect.com/science/article/pii/S0264-410X1631141-0" target="_blank">Pneumonia prevention: Cost-effectiveness analyses of two vaccines among refugee children aged under two years, Haemophilus influenzae type b-containing and pneumococcal conjugate vaccines, during a humanitarian emergency, Yida camp, South Sudan</a>

* 2: <a href="https://www.sciencedirect.com/science/article/pii/S0140-67361730280-5" target="_blank">South Sudan: aftermaths of 3 years of armed conflict</a>

* 3: <a href="https://www.sciencedirect.com/science/article/pii/S2214-109X1630288-1" target="_blank">Elimination of sleeping sickness in Uganda could be jeopardised by conflict in South Sudan</a>

* 4: <a href="https://www.sciencedirect.com/science/article/pii/S0895-39881830009-6" target="_blank">Epidemiology of Measles Cases in South Darfur State, Sudan, 2011–2015</a>

* 5: <a href="https://www.sciencedirect.com/science/article/pii/S2352-61811630130-5" target="_blank">Households' dietary habits and food consumption patterns in Hamishkoreib locality, Kassala State, Sudan</a>

* 6: <a href="https://www.sciencedirect.com/science/article/pii/S0304-40171730137-1" target="_blank">Genotyping of Theileria lestoquardi from sheep and goats in Sudan to support control of Malignant Ovine Theileriosis</a>

* 7: <a href="https://www.sciencedirect.com/science/article/pii/S0379-07381630554-0" target="_blank">High concentrations of lead and barium in hair of the rural population caused by water pollution in the Thar Jath oilfields in South Sudan</a>

* 8: <a href="https://www.sciencedirect.com/science/article/pii/S0140-67361731351-X" target="_blank">Famine in South Sudan</a>

* 9: <a href="https://www.sciencedirect.com/science/article/pii/S0277-53951630196-0" target="_blank">“Only the person who wears the shoes knows where the shoes pinch”: Reworking bottom-up approach in South Sudan</a>

### Choose a paper to read

In [8]:
paper_id = widgets.Dropdown(options={t: i for t, i in zip(titles, range(len(titles)))}, value=3)
read_paper = widgets.Button(description='Read')
read_paper.on_click(on_read)
display(paper_id, read_paper)

INFO: [2018-08-01 14:00:46] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:article/ja:body
INFO: [2018-08-01 14:00:46] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:converted-article/ja:body
INFO: [2018-08-01 14:00:46] indra/elsevier - Found main body element xocs:doc/xocs:serial-item/ja:simple-article/ja:body
INFO: [2018-08-01 14:00:46] indra/elsevier - Found no sections in main body
INFO: [2018-08-01 14:00:46] indra/elsevier - Found 9 paragraphs in main body


We extracted 16 statements:


* **population**(internally displaced people) -> **disease**(disease)

* **livelihood**(tool have contribute) -| **famine**(sleep sickness)

* **conflict**(instability conflict period follow decolonisation dr) -> **duty**(dismantle health service include disease control programme hat disease)

* **famine**(sleep sickness) -| **death**(morbidity)

* **conflict**(conflict) -| **human_migration**(flow refugee cross)

* **pest**(parasite transmit bite tsetse fly) -> **disease**(sleep sickness be disease)

* **conflict**(war) -> **human_migration**(refugee)

* **duty**(phase) -> **death**(sequelae survivor)

* **famine**(sleep sickness) -| **death**(mortality)

* **population**(internally displaced people) -> **biotic_resources**(new areas)

* **disease**(epidemic) -> **death**(death sub-Saharan)

* **conflict**(violence) -> **conflict**(movement flee conflict)

* **conflict**(war) -> **death**(death)

* **disease**(epidemic) -> **death**(thousand death)

* **government_entity**(partner) -| **climate_change_mitigation**(be indicator control)

* **duty**(assistance community invest) -| **crisis**(situation)

INFO: [2018-08-01 14:13:02] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:article/ja:body
INFO: [2018-08-01 14:13:02] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:converted-article/ja:body
INFO: [2018-08-01 14:13:02] indra/elsevier - Found main body element xocs:doc/xocs:serial-item/ja:simple-article/ja:body
INFO: [2018-08-01 14:13:02] indra/elsevier - Found no sections in main body
INFO: [2018-08-01 14:13:02] indra/elsevier - Found 9 paragraphs in main body


We extracted 16 statements:


* **population**(internally displaced people) -> **disease**(disease)

* **livelihood**(tool have contribute) -| **famine**(sleep sickness)

* **conflict**(instability conflict period follow decolonisation dr) -> **duty**(dismantle health service include disease control programme hat disease)

* **famine**(sleep sickness) -| **death**(morbidity)

* **conflict**(conflict) -| **human_migration**(flow refugee cross)

* **pest**(parasite transmit bite tsetse fly) -> **disease**(sleep sickness be disease)

* **conflict**(war) -> **human_migration**(refugee)

* **duty**(phase) -> **death**(sequelae survivor)

* **famine**(sleep sickness) -| **death**(mortality)

* **population**(internally displaced people) -> **biotic_resources**(new areas)

* **disease**(epidemic) -> **death**(death sub-Saharan)

* **conflict**(violence) -> **conflict**(movement flee conflict)

* **conflict**(war) -> **death**(death)

* **disease**(epidemic) -> **death**(thousand death)

* **government_entity**(partner) -| **climate_change_mitigation**(be indicator control)

* **duty**(assistance community invest) -| **crisis**(situation)

INFO: [2018-08-01 14:20:40] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:article/ja:body
INFO: [2018-08-01 14:20:40] indra/elsevier - Could not find main body element xocs:doc/xocs:serial-item/ja:converted-article/ja:body
INFO: [2018-08-01 14:20:40] indra/elsevier - Found main body element xocs:doc/xocs:serial-item/ja:simple-article/ja:body
INFO: [2018-08-01 14:20:40] indra/elsevier - Found no sections in main body
INFO: [2018-08-01 14:20:40] indra/elsevier - Found 9 paragraphs in main body


We extracted 16 statements:


* **population**(internally displaced people) -> **disease**(disease)

* **livelihood**(tool have contribute) -| **famine**(sleep sickness)

* **conflict**(instability conflict period follow decolonisation dr) -> **duty**(dismantle health service include disease control programme hat disease)

* **famine**(sleep sickness) -| **death**(morbidity)

* **conflict**(conflict) -| **human_migration**(flow refugee cross)

* **pest**(parasite transmit bite tsetse fly) -> **disease**(sleep sickness be disease)

* **conflict**(war) -> **human_migration**(refugee)

* **duty**(phase) -> **death**(sequelae survivor)

* **famine**(sleep sickness) -| **death**(mortality)

* **population**(internally displaced people) -> **biotic_resources**(new areas)

* **disease**(epidemic) -> **death**(death sub-Saharan)

* **conflict**(violence) -> **conflict**(movement flee conflict)

* **conflict**(war) -> **death**(death)

* **disease**(epidemic) -> **death**(thousand death)

* **government_entity**(partner) -| **climate_change_mitigation**(be indicator control)

* **duty**(assistance community invest) -| **crisis**(situation)

In [13]:
statements[0].subj.db_refs

{'FAO': [('FAO/events/Value/Number of severely food insecure people',
   0.5262174672978424),
  ('FAO/events/Value/Number of people undernourished (millions) (3-year average)',
   0.5239178623115515),
  ('FAO/events/Losses from manure treated (N content)/Chickens, layers',
   0.5090876234349877),
  ('FAO/events/Value/Percentage of children under 5 years of age affected by wasting (%)',
   0.5016528853921354),
  ('FAO/events/Losses from manure treated (N content)/Cattle, non-dairy',
   0.4880104400991874),
  ('FAO/events/Value/Prevalence of severe food insecurity in the total population',
   0.4855536707724354),
  ('FAO/events/Losses from manure treated (N content)/Cattle, dairy',
   0.4820942883836307),
  ('FAO/events/Direct emissions (CO2eq) (Manure applied)/Chickens, layers',
   0.4726333773918399),
  ('FAO/events/Gross Production Value (constant 2004-2006 1000 I$)/Beans, dry',
   0.4699580481575941),
  ('FAO/events/Losses from manure treated (N content)/Chickens, broilers',
   0.468

### Assemble result of reading

In [16]:
statements = ac.filter_grounded_only(statements, score_threshold=0.7)

INFO: [2018-08-01 14:13:15] indra/assemble_corpus - Filtering 16 statements for grounded agents...
INFO: [2018-08-01 14:13:15] indra/assemble_corpus - 16 statements after filter...


In [18]:
ca = CAGAssembler(standardize_names(statements))
ca.make_model()
Javascript(ca.generate_jupyter_js())

<IPython.core.display.Javascript object>

### Extend the model with natural language

In [6]:
nl_input = widgets.Textarea(value='conflict causes displacement')
read_nl = widgets.Button(description='Read')
read_nl.on_click(on_nl)
display(nl_input, read_nl)

We extracted 0 statements:


* **conflict**(conflict) -> **human_migration**(displacement)

We extracted 1 statements:


* **conflict**(conflict) -> **human_migration**(displacement)

We extracted 2 statements:


* **conflict**(conflict) -| **human_migration**(displacement)

In [None]:
statements

In [7]:
ca = CAGAssembler(standardize_names(statements))
ca.make_model()
Javascript(ca.generate_jupyter_js())

<IPython.core.display.Javascript object>

### Ask questions about causal paths

In [5]:
query = widgets.Text(value='how does pest result in death?')
run_query = widgets.Button(description='Run')
run_query.on_click(on_query)
display(query, run_query)