# Backcasting Demo Notebook

_Loren Champlin_

Adapted from _Adarsh Pyarelal_'s WM 12 Month Evaluation Notebook 

As always, we begin with imports, and print out the commit hash for a rendered
version of the notebook.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pickle
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')
from delphi.visualization import visualize
import delphi.jupyter_tools as jt
import numpy as np
import pandas as pd
from scipy import stats
#Comment out the next line if you do not have the delphi.db file. 
from delphi.db import engine
jt.print_commit_hash_message()
import random as rm
import delphi.evaluation as EN
import delphi.AnalysisGraph as AG
import warnings
#warnings.filterwarnings("ignore")
import logging
logging.getLogger().setLevel(logging.CRITICAL)
from indra.statements import (
    Concept,
    Influence,
    Evidence,
    Event,
    QualitativeDelta,
)
from delphi.utils.indra import *
from delphi.utils.shell import cd
import seaborn as sns
import matplotlib.pyplot as plt
from delphi.utils.fp import flatMap, take, ltake, lmap, pairwise, iterate, exists

Here I will set random seeds

In [None]:
np.random.seed(87)
rm.seed(87)

In [None]:
df = pd.read_csv("../scripts/data/raw/migration/Initial annotation exercise for migration use case - ReachJongleiJan - dep var.tsv",sep='\t')
df = df[~np.isnan(df["Value count"])]
df.drop(df.columns[[0,1,2,4,5,8,9,12,13,16,19,20]],axis=1, inplace=True)

d = {"January": 1.0, "February": 2.0, "March": 3.0, "April": 4.0, "May": 5.0, "June": 6.0, "July": 7.0, "August": 8.0, "September": 9.0, "October": 10.0, "November": 11.0, "December": 12.0}

df.replace(d, inplace=True)

df["Start year"].fillna(value=-1, inplace=True, downcast="infer")
df["Start month"].fillna(value=0, inplace=True, downcast="infer")
df["End year"].fillna(value=-1, inplace=True, downcast="infer")
df["End month"].fillna(value=0, inplace=True, downcast="infer")

c = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}

for i in range(1,13):
    df.loc[(df["Value unit (Amount, Rate, Percentage)"] == "Daily") & (df["End month"] == i),"Value count"] = df.loc[(df["Value unit (Amount, Rate, Percentage)"] == "Daily") & (df["End month"] == i),"Value count"]*c[i]
    
df['Unit'] = 'people'
df.reset_index(drop=True,inplace=True)

df['Variable'] = df['Event trigger text']

df.loc[0:1,"Variable"] = "Internally Displaced People"

df.loc[df["Event trigger text"] == "leaving", "Variable"] = "Outgoing Migrants"
df.loc[df["Event trigger text"] == "returning", "Variable"] = "Incoming Migrants"

df["Source country"] = "South Sudan"
df["Source county"] = "None"
df["Source state"] = "None"
df["Destination country"] = "Ethiopia"
df["Destination county"] = "None"
df["Destination state"] = "None"

df.loc[0,'Source state'] = "Jonglei"
df.loc[0,'Destination country'] = "South Sudan"
df.loc[0,'Destination state'] = "Eastern Lakes"
df.loc[0,'Destination county'] = "Awerial South"

df.loc[1,'Source state'] = "Yei River"
df.loc[1,'Source county'] = "Yei"
df.loc[1,'Destination country'] = "South Sudan"
df.loc[1,'Destination state'] = "Jonglei"
df.loc[1,'Destination county'] = "Bor"
df.loc[df["Variable"] == 'Incoming Migrants', 'Source country'] = "Ethiopia"
df.loc[df["Variable"] == 'Incoming Migrants', 'Destination country'] = "South Sudan"
df["Source"] = "ReachJongleiJan"
df.drop(df.columns[[0,1,2,4]],axis=1, inplace=True)
df.rename(columns={"Value count": "Value"}, inplace=True)
df = df.reindex(columns=['Source country','Source state','Source county','Destination country','Destination state','Destination county','Source','Unit','Value','Variable','Start year','Start month','End year','End month',])

df

In [None]:
df['Country'] = df['Source country']

df.loc[df['Variable'] == 'Incoming Migrants', 'Country'] = df.loc[df['Variable'] == 'Incoming Migrants', 'Destination country']

df['State'] = df['Source state']

df.loc[df['Variable'] == 'Incoming Migrants', 'State'] = df.loc[df['Variable'] == 'Incoming Migrants', 'Destination state']

df['County'] = df['Source county']

df.loc[df['Variable'] == 'Incoming Migrants', 'County'] = df.loc[df['Variable'] == 'Incoming Migrants', 'Destination county']

df['Year'] = df['End year']
df['Month'] = df['End month']

df.loc[(df['End year'] == -1) & (df['End month'] == 0), 'Year'] = df.loc[(df['End year'] == -1) & (df['End month'] == 0), 'Start year']

df.loc[(df['End year'] == -1) & (df['End month'] == 0), 'Month'] = df.loc[(df['End year'] == -1) & (df['End month'] == 0), 'Start month']

df.drop(df.columns[[0,1,2,3,4,5,10,11,12,13]],axis=1, inplace=True)
df = df.reindex(columns=['Country','County','Month','Source','State','Unit','Value','Variable','Year'])


df

In [None]:
query = " ".join(
        [
            f"select * from indicator",
            f"where `Variable` like 'New asylum seeking applicants'",
        ]
    )

results = engine.execute(query)

df_results = pd.DataFrame(results,columns=results.keys())
df_results