In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline

In [None]:
eco = pd.read_excel("../data/TEIS-NSS Project Data 10-2022.xlsx",sheet_name=('ECO with Exit21-22'))
elig = pd.read_excel("../data/TEIS-NSS Project Data 10-2022.xlsx",sheet_name=('Elig Timeline Rpt 2018-2022'))

In [None]:
eco['CHILD_ID']

In [None]:
elig['Child ID']

In [None]:
eco.head(10)

In [None]:
eco.tail(10)

In [None]:
eco.info()

In [None]:
eco['<Calc> Months in Program'].head()

In [None]:
elig.info()

In [None]:
elig.head()

In [None]:
elig.tail()

### Clean up eco table.
With Child ID, District, and day count.
Rename columns to child_id, district, day_count

In [None]:
eco_count = eco[["CHILD_ID", "DISTRICT", "<Calc> Months in Program","Ent SOCIAL_SCALE","Ent KNOWLEDGE_SCALE","Ent APPROPRIATE_ACTION_SCALE","Exit SOCIAL_SCALE","Exit KNOWLEDGE_SCALE","Exit APPROPRIATE_ACTION_SCALE", 'OC1 Data Check']]

In [None]:
eco_count.columns = ['child_id', 'district', 'month_count', 'ent_social', 'ent_knowledge', 'ent_app_action', 'exit_social', 'exit_knowledge', 'exit_app_action', 'oc1']
eco_count.info()

In [None]:
eco_count['district'].head(20)

### Clean up ELIG table.
With Child ID, Elig Category, Day Count
Rename columns to child_id, elig_cat
Change data to float

In [None]:
elig_cat1 = elig[["Child ID", "Init. Elig. Category", 'District']]

In [None]:
elig_cat1.columns = ['child_id', 'elig_cat', 'district']

In [None]:
elig_cat1.info()

In [None]:
elig_cat1.head(20)

In [None]:
elig_cat1.groupby(['elig_cat'])['elig_cat'].count()

In [None]:
elig_cat1["child_id"] = pd.to_numeric(elig_cat1["child_id"], downcast="float")

In [None]:
elig_cat1.info()

### Merge the two new tables.

In [None]:
eco_elig = pd.merge(eco_count, elig_cat1, how='outer',on=['child_id', 'district'])

In [None]:
eco_elig.info()

In [None]:
eco_elig.head(20)

In [None]:
eco_elig.info()

In [None]:
eco_elig = eco_elig[eco_elig['oc1'].notna()]


In [None]:
eco_elig

### Add column for calculation of difference of scores

In [None]:
eco_elig['social_change'] = eco_elig['exit_social'] - eco_elig['ent_social']
eco_elig['knowledge_change'] = eco_elig['exit_knowledge'] - eco_elig['ent_knowledge']
eco_elig['app_action_change'] = eco_elig['exit_app_action'] - eco_elig['ent_app_action'] 

eco_elig.head(20)

In [None]:
print(eco_elig[['ent_social', 'exit_social', 'social_change']].head(20))

In [None]:
eco_elig.info()

### Subset with just changes and child_id
Start graphs on this data

In [None]:
eco_change_cat = eco_elig[['social_change', 'knowledge_change', 'app_action_change','elig_cat','district']]

In [None]:
eco_change_cat.value_counts('elig_cat').count

In [None]:
eco_change_cat

In [None]:
sum_change = eco_change_cat.groupby(['elig_cat']).sum()


In [None]:
sum_change

In [None]:
sum_change.plot.bar(stacked = True)

### Try again with the nulls dropped

In [None]:
drop_change_by_cat = eco_change_cat.dropna()

In [None]:
drop_change_by_cat.head(30)

In [None]:
drop_change_by_cat.info()

In [None]:
drop_change_by_cat.value_counts('elig_cat').count

In [None]:
display(drop_change_by_cat)

In [None]:
drop_change_by_cat.groupby(['elig_cat']).sum()

In [None]:
sns.barplot(data = drop_change_by_cat, x = 'social_change')

In [None]:
dcbc = drop_change_by_cat.groupby(['elig_cat'], as_index = False).mean()

In [None]:
dcbc.info()

In [None]:
display(dcbc)

### Social Change graph

In [None]:
sns.set_theme(style="ticks")

f, ax = plt.subplots(figsize=(7, 6))

sns.boxplot(x="social_change", y="elig_cat", data=drop_change_by_cat,
            whis=[0, 100], width=.6, palette="vlag")

sns.stripplot(x="social_change", y="elig_cat", data=drop_change_by_cat,
              size=4, color=".3", linewidth=0)

ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

### Knowledge Change Graph

In [None]:
sns.set_theme(style="ticks")

f, ax = plt.subplots(figsize=(7, 6))

sns.boxplot(x="knowledge_change", y="elig_cat", data=drop_change_by_cat,
            whis=[0, 100], width=.6, palette="vlag")

sns.stripplot(x="knowledge_change", y="elig_cat", data=drop_change_by_cat,
              size=4, color=".3", linewidth=0)


ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

### Appropriate Action Change

In [None]:
sns.set_theme(style="ticks")

f, ax = plt.subplots(figsize=(7, 6))

sns.boxplot(x="app_action_change", y="elig_cat", data=drop_change_by_cat,
            whis=[0, 100], width=.6, palette="vlag")

sns.stripplot(x="app_action_change", y="elig_cat", data=drop_change_by_cat,
              size=4, color=".3", linewidth=0)

ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

### Each child is associated with a Point of Entry (POE) office, as indicated in column A of the "ECO with Exit21-22" column. Do the above comparison by POE as well, similar to the calculations in the "ECO by POE" tab.

In [None]:
drop_change_by_cat.info()

In [None]:
poe1 = drop_change_by_cat[['social_change','knowledge_change', 'app_action_change', 'elig_cat']]

In [None]:
poe = drop_change_by_cat.dropna()

In [None]:
poe = poe[['social_change','knowledge_change', 'app_action_change', 'district']]

In [None]:
poe_sum = poe.groupby(['district'], as_index = False).sum()

In [None]:
poe_sum.plot(kind = 'bar',figsize = (10,6))

In [None]:
sns.set_theme(style="whitegrid", palette="pastel")

#sns.color_palette("dark:#5A9_r", as_cmap=True)

f, ax = plt.subplots(figsize=(7, 6))

sns.boxplot(x="app_action_change", y="district", data=poe,
            whis=[0, 100], width=.6, palette="vlag")

sns.stripplot(x="app_action_change", y="district", data=poe,
              size=4, color=".3", linewidth=0)

ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

# Pretty this one up.  

Clean up, add value labels, remove bars. 

In [None]:

a = sns.barplot(data=poe, x='district', y='app_action_change',\
                order= ('FT','UC','SW','ET','NW','SE','GN','SC','MD'),ci=None)

for container in a.containers:
    a.bar_label(container)
    
a = plt.xlabel('District')
a = plt.ylabel('Average Test Score Increase')
a = plt.title('Appropriate Action Score Change')

#a.bar_label(i,)
plt.show()

In [None]:

sns.barplot(data=poe, x='district', y='knowledge_change',
            order= ('FT','UC','ET','NW','SE','SC','SW','GN','MD'),ci=None)
a = plt.xlabel('District')
a = plt.ylabel('Average Test Score Increase')
a = plt.title('Knowledge Score Change')

a.set_yticks(float(0,1.7,.2))
a.set_yticklabels(float(0,1.7,.2))


# a.set(yticks=np.arange(0,1.8,0.2))
# a.set_yticklabels(np.arange(0,1.8,0.2))


In [None]:
sns.barplot(data=poe, x='district', y='social_change',\
            order= ('FT','ET','UC','NW','SE','GN','SC','SW','MD'),ci=None)
a = plt.xlabel('District')
a = plt.ylabel('Average Test Score Increase')
a = plt.title('Social Score Change')

In [None]:
sns.barplot(data=poe1, x='elig_cat', y='app_action_change')

In [None]:
sns.barplot(data=poe1, x='elig_cat', y='social_change')

In [None]:
poe1.head()

In [None]:
elig1avg = poe1[['social_change', 'app_action_change', 'knowledge_change']].sum()
elig1avg.plot(kind="bar", title='Developmental Evaluation')


In [None]:
elig1avg = poe1[['knowledge_change']].mean()
elig1avg.plot(kind="bar")

In [None]:
eco_elig.info()

In [None]:
plt.figure(figsize=(100,100))

g = sns.FacetGrid(eco_count, col = 'district', height =4)
g.map(sns.lineplot,'month_count', 'ent_social', marker='o')
g.axes[0,0].set_xlabel('month_count')
g.axes[0,1].set_xlabel('month_count')
g.axes[0,2].set_xlabel('month_count')

# Average point increase for test scores based on elig cat
g = g.map(lambda y, **kw: plt.axhline(y.mean(), color="k"), 'social_change')
