In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import sys

data_generated="26072020"
directory = "historic_"+data_generated

us_states = []
dates = []
df = []
dpd = []

for days_ago in range(100,0,-1):
    filename = directory+"/"+"us_deaths_"+str(days_ago)+".csv"
    # data has one row per day for all counties
    data = pd.read_csv( filename, parse_dates=True, dayfirst=False) 
    # remove lines with 0 population
    data = data[data.Population>0]
    # which data is this file from
    date = data['Date'].max()
    # this is individual per file
    csv_dates = data['Date'].unique()

    # first run? initalize
    if len(us_states)==0:
        us_states = data['Province/State'].unique()
        dates = pd.date_range(date,periods=100)
        df = pd.DataFrame( 0, 
                           index=pd.MultiIndex.from_product([us_states,dates], 
                                                            names=['state', 'dates']),
                           columns=["delay","count"])
    
    # loop through the data
    for state in us_states:
        # loop though result dates
        state_data = data[data['Province/State']==state][['Date','Case']]
        state_data = state_data.groupby('Date').sum()
        r_date_index = 0
        for r_date in dates:
            # if csv has data for this date:
            if r_date in pd.to_datetime(state_data.index):
                data_date_str = r_date.strftime("%Y-%m-%d")
                print("is da!!")
                # if last value we know is 0, just set it
                new_val = state_data.loc[data_date_str]['Case']
                if df.loc[(state,r_date),'count']==0:
                    df.loc[(state,r_date),'count'] = new_val
                # if last value is different, set it and calc the number of days
                #   between when the csv file is from and the date we are looking at
                elif df.loc[(state,r_date),'count'] != new_val:
                    # the csv is ... days after the original date
                    delay_count = r_date_index - (100-days_ago)
                    df.loc[(state,r_date),'count'] = new_val
                    df.loc[(state,r_date),'delay'] = delay_count            
            r_date_index += 1

# finally, when the data for this state has been calculated,
# put the result back

for state in us_states:
    dpd.append(df.loc[state,'delay'].tolist())
        
dpd = np.array(dpd)

fig, ax = plt.subplots()
im = ax.imshow(dpd, cmap="Spectral")

# We want to show all ticks...
ax.set_xticks(np.arange(len(dates)))
ax.set_yticks(np.arange(len(us_states)))
# ... and label them with the respective list entries
ax.set_xticklabels(dates)
ax.set_yticklabels(us_states)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
#for i in range(len(us_states)):
#    for j in range(len(dates)):
#        text = ax.text(j, i, dpd[i, j],
#                       ha="center", va="center", color="w")


fig = plt.gcf()
fig.set_size_inches(80, 30)
fig.savefig('d.jpeg', dpi=100)
print("done")