In [13]:
import pandas as pd
import glob


def reformat_forecasts(file_path):
    # read forecast
    fips_codes = pd.read_csv('../template/state_fips_codes.csv')
    df = pd.read_csv(file_path)
    
    # lowercase all column headers
    df.columns = map(str.lower, df.columns)

    # Include US and state data
    locations_in_file = df["location"].unique()
    if "US" in locations_in_file:
        df["location"].replace({"US": 1000}, inplace=True)
        df["location"] = df["location"].apply(pd.to_numeric)
        df = df.merge(fips_codes, left_on='location', right_on='state_code', how='left')
        df.loc[df["location"] == 1000, "state_name"] = "US National"
    else:
        df["location"] = df["location"].apply(pd.to_numeric)
        df = df.merge(fips_codes, left_on='location', right_on='state_code', how='left')
        
    # Only visualize wk ahead forecasts
    targets = ['1 wk ahead cum death', '2 wk ahead cum death', '3 wk ahead cum death', '4 wk ahead cum death']
    df = df[df["target"].isin(targets)]

    # Only visualize certain states
    states = ['US National', 'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut',
              'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
              'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
              'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
              'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
              'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
              'West Virginia', 'Wisconsin', 'Wyoming', 'District of Columbia']
    df = df[df["state_name"].isin(states)]

    # Only visualize certain quantiles
    quantiles = [0.025, 0.25, 0.75, 0.975, None]
    df = df[df["quantile"].isin(quantiles)]
    
    df["Unit"] = "integer"

    # Rename bin column
    df = df.rename(columns={"target": "Target",
                            "state_name": "Location",
                            "type": "Type",
                            "quantile": "Quantile",
                            "value": "Value"})

    # use "NA" instead of null value
    df = df.fillna("NA")

    # Reorder Columns
    df = df[["Location", "Target", "Type", "Unit", "Quantile", "Value"]]

    return df


# loop through model directories
my_path = "./data/"
for file_path in glob.iglob(my_path + "**/**/*.csv", recursive=False):
    print(file_path)
    df2 = reformat_forecasts(file_path)
    print(df2)
    #df2.to_csv(file_path, index=False, float_format='%.14f')


./data/2019-2020/CU-80-contact/202016.csv
           Location                Target      Type     Unit Quantile  Value
2448        Alabama  1 wk ahead cum death     point  integer       NA    702
2449         Alaska  1 wk ahead cum death     point  integer       NA    160
2450        Arizona  1 wk ahead cum death     point  integer       NA   2522
2451       Arkansas  1 wk ahead cum death     point  integer       NA    180
2452     California  1 wk ahead cum death     point  integer       NA  10318
...             ...                   ...       ...      ...      ...    ...
88110     Tennessee  4 wk ahead cum death  quantile  integer     0.75   8855
88115  Pennsylvania  4 wk ahead cum death  quantile  integer     0.75  19519
88118        Oregon  4 wk ahead cum death  quantile  integer     0.75   8122
88120  Rhode Island  4 wk ahead cum death  quantile  integer     0.75   1384
88124          Utah  4 wk ahead cum death  quantile  integer     0.75   2518

[1020 rows x 6 columns]
./data/20

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


Empty DataFrame
Columns: [Location, Target, Type, Unit, Quantile, Value]
Index: []
./data/2019-2020/MOBS-MOBS/202016.csv
Empty DataFrame
Columns: [Location, Target, Type, Unit, Quantile, Value]
Index: []
./data/2019-2020/IHME-CurveFit/202016.csv
         Location                Target      Type     Unit Quantile      Value
2         Alabama  1 wk ahead cum death     point  integer       NA    147.832
5         Alabama  2 wk ahead cum death     point  integer       NA    226.464
8         Alabama  3 wk ahead cum death     point  integer       NA    292.556
11        Alabama  4 wk ahead cum death     point  integer       NA    330.982
22        Alabama  1 wk ahead cum death  quantile  integer    0.025     96.000
...           ...                   ...       ...      ...      ...        ...
4031  Connecticut  4 wk ahead cum death  quantile  integer    0.025   1310.625
4062  Connecticut  1 wk ahead cum death  quantile  integer    0.975   2259.175
4065  Connecticut  2 wk ahead cum death  qu

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


Empty DataFrame
Columns: [Location, Target, Type, Unit, Quantile, Value]
Index: []
./data/2019-2020/Imperial-Ensemble2/202015.csv
Empty DataFrame
Columns: [Location, Target, Type, Unit, Quantile, Value]
Index: []
./data/2019-2020/CU-60-contact/202016.csv
           Location                Target      Type     Unit Quantile  Value
2448        Alabama  1 wk ahead cum death     point  integer       NA    337
2449         Alaska  1 wk ahead cum death     point  integer       NA     88
2450        Arizona  1 wk ahead cum death     point  integer       NA   1112
2451       Arkansas  1 wk ahead cum death     point  integer       NA     90
2452     California  1 wk ahead cum death     point  integer       NA   4645
...             ...                   ...       ...      ...      ...    ...
88110     Tennessee  4 wk ahead cum death  quantile  integer     0.75   2260
88115  Pennsylvania  4 wk ahead cum death  quantile  integer     0.75   1548
88118        Oregon  4 wk ahead cum death  quantile 