# Delphi Demo - CAGs from <span style='color:royalblue; font-style: italic'>Text</span>

*July 30, 2018*

This is a Jupyter notebook created to showcase the design and capabilities of
the Delphi package, available at [https://github.com/ml4ai/delphi](https://github.com/ml4ai/delphi). 

A rendered HTML version of this notebook can also be found at
[`http://vision.cs.arizona.edu/adarsh/export/delphi_demo_wm_pi_meeting_2018.html`](http://vision.cs.arizona.edu/adarsh/export/delphi_demo_wm_pi_meeting_2018.html)

This demo has been tested with the version of Delphi corresponding to the commit hash below.

In [None]:
!git rev-parse HEAD

## Construct and visualize CAG corresponding to use case

In [None]:
%load_ext autoreload
%autoreload 2
from delphi.utils import get_data_from_url
import urllib.request as request
import pickle
from delphi.AnalysisGraph import AnalysisGraph
from delphi.visualization import visualize
from delphi.manipulation import merge_nodes
from delphi.assembly import get_valid_statements_for_modeling
from delphi.subgraphs import get_subgraph_for_concept_pairs
import pandas as pd
from delphi.inspection import statements
import delphi.jupyter_tools as jt
from delphi.quantification import map_concepts_to_indicators

In [None]:
from delphi.utils.indra import get_statements_from_json
url = "http://vision.cs.arizona.edu/adarsh/export/demos/data/preassembled_indra_statements-small.json"
sts = get_statements_from_json(request.urlopen(url).read())

In [None]:
G = AnalysisGraph.from_statements(sts)

In [None]:
G = merge_nodes(G, 'food_security', 'food_insecurity', same_polarity=False)

In [None]:
concepts = ["precipitation", "food_insecurity", "conflict"]
G = get_subgraph_for_concept_pairs(G, concepts, cutoff=2)

In [None]:
visualize(G, rankdir='TB', nodes_to_highlight=concepts)

## Inspecting and editing CAGs

## Inspecting statements

In [None]:
pd.options.display.max_colwidth=1000
pd.options.display.width=1000
jt.create_statement_inspection_table(statements(G))

## Removing incorrect edges

In [None]:
G.remove_edges_from([('food_insecurity', 'drought'), ('food_insecurity', 'food_insecurity')])
visualize(G, rankdir='LR', nodes_to_highlight='food_insecurity')

## Mapping concepts to indicators

In [None]:
url = 'http://vision.cs.arizona.edu/adarsh/export/demos/data/concept_to_indicator_mapping.txt'
mapping = get_data_from_url(url)
map_concepts_to_indicators(G, 2, url)
visualize(G, indicators=True)

In [None]:
from delphi.random_variables import Indicator
G.nodes['conflict']['indicators'] = G.nodes['conflict']['indicators'][1:]
G.nodes['market']['indicators'] = G.nodes['market']['indicators'][1:]
visualize(G, indicators=True)

In [None]:
from datetime import datetime
from delphi.parameterization import parameterize
date = datetime(2014, 1, 1)
url = 'http://vision.cs.arizona.edu/adarsh/export/demos/data/south_sudan_data.csv'
df = pd.read_csv(url, sep="|", index_col="Indicator Name")
parameterize(G, datetime(2014, 1,1), df)
visualize(G, indicators=True, indicator_values = True,
          graph_label=f'Causal Analysis Graph for South Sudan, {date.year}')

## Infer transition model

In [None]:
from delphi.inference import infer_transition_model
url = 'http://vision.cs.arizona.edu/adarsh/export/demos/data/adjectiveData.tsv'
infer_transition_model(request.urlopen(url), 100)

## Set initial parameters

In [None]:
from delphi.export import export_default_initial_values
export_default_initial_values(G, variables_file='variables.csv')
s0 = pd.read_csv('variables.csv', index_col=0, header=None,
                 error_bad_lines=False)[1]
s0.loc['∂(conflict)/∂t'] = 0.1
s0.to_csv('variables.csv')
s0

## Execute model

In [None]:
%matplotlib inline
from delphi.bmi import *
from matplotlib import pyplot as plt
import seaborn as sns
from delphi.utils.misc import _insert_line_breaks

initialize(G, 'variables.csv')
concept1 = 'conflict'
concept2 = 'food_insecurity'

ind1 = G.nodes[concept1]['indicators'][0]
ind2 = G.nodes[concept2]['indicators'][0]

plot_data = {variable: {'xs':[], 'ys':[],
                        'ylabel': variable.replace('_', ' ').capitalize(), 'units': '',
                       'ax_number': i} 
             for i, variable in enumerate((concept1, concept2, ind1.name, ind2.name))}

plot_data[ind1.name]['units'] = f"({ind1.unit})"
plot_data[ind2.name]['units'] = f"({ind2.unit})"

n_timesteps = 5
for day in range(n_timesteps):
    update(G)
    for concept in (concept1, concept2):
        for datapoint in G.nodes[concept]['rv'].dataset:
            plot_data[concept]['xs'].append(day+1)
            plot_data[concept]['ys'].append(datapoint)
        indicator = G.nodes[concept]['indicators'][0]
        for datapoint in indicator.dataset:
            if indicator.name != ind2.name:
                plot_data[indicator.name]['xs'].append(day+1)
                plot_data[indicator.name]['ys'].append(datapoint)
            else:
                if 0 < datapoint < 100:
                    plot_data[indicator.name]['xs'].append(day+1)
                    plot_data[indicator.name]['ys'].append(datapoint)
                    
        


fig, axes = plt.subplots(1,4, figsize=(20, 4))
for variable, plot_data in plot_data.items():
    ax = axes[plot_data['ax_number']]
    ax.set_xlabel('Time step')
    ax.set_xticks(range(6))
    ax.set_title(_insert_line_breaks(' '.join((plot_data['ylabel'], plot_data['units']))))
    sns.lineplot(plot_data['xs'], plot_data['ys'], ax = ax)

Still todo: truncate probability distributions from 0 to 100 for percentages.

In [None]:
from delphi.inspection import inspect_edge
inspect_edge(G, 'conflict', 'food_insecurity')

## Exploring the unknown unknowns

In [None]:
url = 'http://vision.cs.arizona.edu/adarsh/export/demos/data/pi_mtg_demo_unfiltered.pkl'
sts=pickle.load(get_data_from_url(url))

In [None]:
from delphi.subgraphs import get_subgraph_for_concept_pair
G = AnalysisGraph.from_statements(sts)
merge_nodes(G, 'food_security', 'food_insecurity', same_polarity=False)
G = get_subgraph_for_concept_pair(G, 'drought', 'food_insecurity', cutoff=2)
visualize(G, nodes_to_highlight=['drought','food_insecurity'])

# Causal analysis graphs from <span style='color:royalblue; font-style: italic'>Software</span>

## Original Fortran program

In [None]:
jt.display(f'../data/program_analysis/crop_yield.f')

In [None]:
%cd ../delphi/program_analysis/autoTranslate/
!./autoTranslate ../../../data/program_analysis/crop_yield.f
%cd ../../../notebooks/

## AST in XML Format

In [None]:
jt.display('../delphi/program_analysis/autoTranslate/crop_yield.xml')

## Equivalent Python code

In [None]:
jt.display('../delphi/program_analysis/autoTranslate/crop_yield.py')

## Extracted lambda functions

In [None]:
jt.display('../delphi/program_analysis/autoTranslate/lambdas.py')

## DBN-JSON file

In [None]:
dbn_json = '../delphi/program_analysis/autoTranslate/pgm.json'
jt.display(dbn_json)

## Executable DBN - Loop plate representation

In [None]:
from delphi.program_analysis.scopes import *
root = Scope.from_json(dbn_json)
A = root.to_agraph()
jt.display_image(A.draw(format='png', prog='dot'))

## High-level representation of CAG from program

In [None]:
import sys
sys.path.append('../delphi/program_analysis/autoTranslate')
import lambdas
from delphi.program_analysis.ProgramAnalysisGraph import ProgramAnalysisGraph
from delphi.bmi import *
G = ProgramAnalysisGraph.from_agraph(A, lambdas)
initialize(G)
from delphi.visualization import visualize
visualize(G, show_values = True)

In [None]:
update(G)
visualize(G, show_values = True)

In [None]:
update(G)
visualize(G, show_values = True)

## Sensitivity Analysis

In [None]:
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
import numpy as np
from matplotlib import pyplot as plt
from delphi.utils import compose, rcompose
from delphi.program_analysis.ProgramAnalysisGraph import ProgramAnalysisGraph

def make_plots(n_samples, deterministic = True):
    variables = ('RAIN', 'TOTAL_RAIN', 'YIELD_EST')
    vals = {k:[] for k in variables}
    days = {k:[] for k in variables}
    palette = sns.color_palette()
    colors = {k:palette[i] for i, k in enumerate(vals)}
    fig, axes = plt.subplots(1,len(vals), figsize=(18, 5))
    ax = {k:axes[i] for i, k in enumerate(vals)}

    for _ in range(n_samples):
        G = ProgramAnalysisGraph.from_agraph(A, lambdas)
        if not deterministic:
            G.nodes['MAX_RAIN']['init_fn'] = lambda: np.random.normal(4, 1)
        initialize(G)
        for i in range(1,31):
            update(G)
            for k in vals:
                vals[k].append(G.nodes[k]['value'])
                days[k].append(G.nodes['DAY']['value'])

    for k in vals:
        sns.lineplot(days[k], vals[k], ax = ax[k], label=k, color=colors[k])
        ax[k].set_xlabel('DAY', fontsize=20)
        ax[k].set_ylabel(k, fontsize=20)

    plt.tight_layout()

make_plots(10, deterministic=False)