In [1]:
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
import glob
import numpy as np

In [2]:
final_df = pd.DataFrame()
file_list = glob.glob("./Old_runs/outputs/**/*.csv", recursive=True)

In [3]:
name_list = file_list[0].split("/")

file_name = name_list[-1].split(".")[0]

In [4]:
name_list

['.',
 'Old_runs',
 'outputs',
 '108_briefcase',
 '20k_sims',
 '6407920_108_briefcase_to_9_with_20000_sims.csv']

In [5]:
def add_full_csv(full_file_name, final_df):
    df = pd.read_csv(full_file_name)
    
    name_list = full_file_name.split("/")
    file_name = name_list[-1].split(".")[0]
    shapefile = name_list[3]
    
    new_rows = []

    max_dist = df['district'].max()

    for i, district_df in df.groupby('district'):
        if i == max_dist:
            continue

        district_df = df[df['district'] == i]
        i_max = district_df['b1_probs'].max()
        i_med = district_df["b1_probs"].median()

        nonzero_probs = district_df[district_df['b1_probs'] != 0]['b1_probs']
        i_min = nonzero_probs.min() if not nonzero_probs.empty else 0

        ratio_min = i_max / i_min if i_min != 0 else None
        ratio_med = i_max / i_med

        new_rows.append({"Shapefile": shapefile,
                         "File Name": file_name,
                         "Generation": i, 
                         "Max": i_max, 
                         "Min": i_min, 
                         "Median": i_med,
                         "Ratio Max to Min": ratio_min,
                         "Ratio Max to Median": ratio_med})



    new_df = pd.DataFrame(new_rows, columns=["Shapefile",
                                             "File Name",
                                             "Generation", 
                                             "Max", 
                                             "Min", 
                                             "Median",
                                             "Ratio Max to Min",
                                             "Ratio Max to Median"])
    
    
    final_df = pd.concat([final_df, new_df])
    
    return final_df
    

In [6]:
me_df = pd.DataFrame()

for name in file_list:
    me_df = add_full_csv(name, me_df)

    
me_df.to_csv("./report/SMC_full_data_output_(max,min,ratios).csv", index=False);

In [7]:
# me_df = pd.read_csv("./report/SMC_full_data_output_(max,min,ratios).csv")

In [8]:
worst_max_min_ratios_by_run = pd.DataFrame()
for _, b in me_df.groupby("Shapefile"):
    worst_max_min_ratios_by_run = pd.concat([worst_max_min_ratios_by_run, b.nlargest(1,"Ratio Max to Min")])

worst_max_med_ratios_by_run = pd.DataFrame()
for _, b in me_df.groupby("Shapefile"):
    worst_max_med_ratios_by_run = pd.concat([worst_max_med_ratios_by_run, b.nlargest(1,"Ratio Max to Median")])

In [9]:
worst_max_min_ratios_by_run.to_csv("./report/SMC_largest_Max:Min_ratios_by_run.csv", index=False)
worst_max_med_ratios_by_run.to_csv("./report/SMC_largest_Max:Median_ratios_by_run.csv", index=False)

In [10]:
average_max_min_by_shapefile = me_df.groupby("Shapefile")["Ratio Max to Min"].mean().reset_index()
average_max_min_by_shapefile.rename(columns={'Ratio Max to Min': 'Max:Min Average'}, inplace=True)


average_max_med_by_shapefile = me_df.groupby("Shapefile")["Ratio Max to Median"].mean().reset_index()
average_max_med_by_shapefile.rename(columns={'Ratio Max to Median': 'Max:Median Average'}, inplace=True)


In [11]:
average_max_min_by_shapefile.to_csv("./report/Average_Max:Min_by_shapefile(all_runs).csv", index=False)
average_max_med_by_shapefile.to_csv("./report/Average_Max:Median_by_shapefile(all_runs).csv", index=False)

In [51]:
me_df["File Name"].unique()

array(['6407920_108_briefcase_to_9_with_20000_sims',
       '7552429_108_briefcase_to_9_with_20000_sims',
       '31717062_108_briefcase_to_9_with_20000_sims',
       '29043567_108_briefcase_to_9_with_20000_sims',
       '53736859_108_briefcase_to_9_with_20000_sims',
       '70914158_108_briefcase_to_9_with_20000_sims',
       '362570_108_briefcase_to_9_with_20000_sims',
       '3414273_108_briefcase_to_9_with_20000_sims',
       '521138_108_briefcase_to_9_with_20000_sims',
       '6888378_108_briefcase_to_9_with_20000_sims',
       '53736859_108_briefcase_to_9_with_40000_sims',
       '6407920_108_briefcase_to_9_with_40000_sims',
       '31717062_108_briefcase_to_9_with_40000_sims',
       '521138_108_briefcase_to_9_with_40000_sims',
       '3414273_108_briefcase_to_9_with_40000_sims',
       '7552429_108_briefcase_to_9_with_40000_sims',
       '29043567_108_briefcase_to_9_with_40000_sims',
       '362570_108_briefcase_to_9_with_40000_sims',
       '70914158_108_briefcase_to_9_with_40

In [54]:
# thingy = me_df[me_df["File Name"] == "ny_vtd" | me_df["File Name"] == "ny_vtd_0p05"].groupby("File Name")["Ratio Max to Median"].max()

thingy = me_df[(me_df["Shapefile"] == "ny_vtd") | (me_df["Shapefile"] == "ny_vtd_0p05")].groupby("File Name")["Ratio Max to Median"].max()


In [55]:
thingy

File Name
29043567_ny_vtd_pop_tol_0p05_to_63_with_20000_sims    113.255426
29043567_ny_vtd_pop_tol_0p05_to_63_with_40000_sims    121.687141
29043567_ny_vtd_pop_tol_0p05_to_63_with_5000_sims      50.041894
29043567_ny_vtd_to_63_with_20000_sims                  54.532600
29043567_ny_vtd_to_63_with_40000_sims                 115.602104
29043567_ny_vtd_to_63_with_5000_sims                  101.181770
31717062_ny_vtd_pop_tol_0p05_to_63_with_20000_sims    125.433664
31717062_ny_vtd_pop_tol_0p05_to_63_with_40000_sims    126.460489
31717062_ny_vtd_pop_tol_0p05_to_63_with_5000_sims      65.950210
31717062_ny_vtd_to_63_with_20000_sims                  92.958283
31717062_ny_vtd_to_63_with_40000_sims                 126.001489
31717062_ny_vtd_to_63_with_5000_sims                   64.477144
521138_ny_vtd_pop_tol_0p05_to_63_with_20000_sims      124.542598
521138_ny_vtd_pop_tol_0p05_to_63_with_40000_sims       94.053785
521138_ny_vtd_pop_tol_0p05_to_63_with_5000_sims        78.391906
521138_ny_vtd_t

In [56]:
thingy = pd.DataFrame(thingy)

In [57]:
import pandas as pd
from IPython.display import display, HTML

In [58]:
css = """
.scrollable_dataframe table { 
    display: block;
    overflow-y: scroll;
    max-height: 300px;
}
.scrollable_dataframe th, .scrollable_dataframe td {
    min-width: 130px; 
    max-width: 600px; 
    text-align: left; 
    margin: auto;
}
"""


In [59]:
html = f"<style>{css}</style>{thingy.to_html(classes='scrollable_dataframe')}"
display(HTML(html))


Unnamed: 0_level_0,Ratio Max to Median
File Name,Unnamed: 1_level_1
29043567_ny_vtd_pop_tol_0p05_to_63_with_20000_sims,113.255426
29043567_ny_vtd_pop_tol_0p05_to_63_with_40000_sims,121.687141
29043567_ny_vtd_pop_tol_0p05_to_63_with_5000_sims,50.041894
29043567_ny_vtd_to_63_with_20000_sims,54.5326
29043567_ny_vtd_to_63_with_40000_sims,115.602104
29043567_ny_vtd_to_63_with_5000_sims,101.18177
31717062_ny_vtd_pop_tol_0p05_to_63_with_20000_sims,125.433664
31717062_ny_vtd_pop_tol_0p05_to_63_with_40000_sims,126.460489
31717062_ny_vtd_pop_tol_0p05_to_63_with_5000_sims,65.95021
31717062_ny_vtd_to_63_with_20000_sims,92.958283
