In [1]:
import numpy as np
import pandas as pd
import sys, os, json, csv
import matplotlib
from matplotlib import pyplot as plt

In [2]:
datapath = '../data'
ts_n_deaths = pd.read_csv(f'{datapath}/time_series_covid19_deaths_global_narrow.csv')
ts_n_recov = pd.read_csv(f'{datapath}/time_series_covid19_recovered_global_narrow.csv')
ts_n_conf = pd.read_csv(f'{datapath}/time_series_covid19_confirmed_global_narrow.csv')

In [3]:
ts_n_deaths = ts_n_deaths.loc[1:].reset_index(drop=True)
ts_n_recov = ts_n_recov.loc[1:].reset_index(drop=True)
ts_n_conf = ts_n_conf.loc[1:].reset_index(drop=True)

In [4]:
ts_n_deaths = ts_n_deaths.astype({ 'Value': 'int64' })
ts_n_recov = ts_n_recov.astype({ 'Value': 'int64' })
ts_n_conf = ts_n_conf.astype({ 'Value': 'int64' })

In [5]:
ts_n_deaths = ts_n_deaths[~ts_n_deaths['Province/State'].isna()].reset_index(drop=True)
ts_n_recov = ts_n_recov[~ts_n_recov['Province/State'].isna()].reset_index(drop=True)
ts_n_conf = ts_n_conf[~ts_n_conf['Province/State'].isna()].reset_index(drop=True)

In [6]:
print(ts_n_deaths[~ts_n_deaths['Province/State'].isna()]['Country/Region'].unique())
print(ts_n_recov[~ts_n_recov['Province/State'].isna()]['Country/Region'].unique())
print(ts_n_conf[~ts_n_conf['Province/State'].isna()]['Country/Region'].unique())

['Australia' 'Canada' 'China' 'Denmark' 'France' 'Netherlands'
 'United Kingdom']
['Australia' 'China' 'Denmark' 'France' 'Netherlands' 'United Kingdom']
['Australia' 'Canada' 'China' 'Denmark' 'France' 'Netherlands'
 'United Kingdom']


In [7]:
grouped_deaths = ts_n_deaths.groupby(['Country/Region', 'Province/State', 'Date']).agg({ 'Value': 'sum' })
grouped_recov = ts_n_recov.groupby(['Country/Region', 'Province/State', 'Date']).agg({ 'Value': 'sum' })
grouped_conf = ts_n_conf.groupby(['Country/Region', 'Province/State', 'Date']).agg({ 'Value': 'sum' })

In [8]:
grouped_deaths.columns = ['Deaths']
grouped_recov.columns = ['Recovered']
grouped_conf.columns = ['Confirmed']

In [9]:
grouped = grouped_deaths.merge(grouped_recov, how='left', left_index=True, right_index=True)
grouped = grouped.merge(grouped_conf, how='left', left_index=True, right_index=True)
grouped.reset_index(drop=False, inplace=True)
grouped.head(10)

Unnamed: 0,Country/Region,Province/State,Date,Deaths,Recovered,Confirmed
0,Australia,Australian Capital Territory,2020-01-22,0,0.0,0
1,Australia,Australian Capital Territory,2020-01-23,0,0.0,0
2,Australia,Australian Capital Territory,2020-01-24,0,0.0,0
3,Australia,Australian Capital Territory,2020-01-25,0,0.0,0
4,Australia,Australian Capital Territory,2020-01-26,0,0.0,0
5,Australia,Australian Capital Territory,2020-01-27,0,0.0,0
6,Australia,Australian Capital Territory,2020-01-28,0,0.0,0
7,Australia,Australian Capital Territory,2020-01-29,0,0.0,0
8,Australia,Australian Capital Territory,2020-01-30,0,0.0,0
9,Australia,Australian Capital Territory,2020-01-31,0,0.0,0


In [10]:
grouped = grouped.fillna(0)

In [11]:
def new_cols(row):
    if (row.name > 0 and row['Country/Region'] == grouped.loc[row.name - 1]['Country/Region']):
        row['newConfirmed'] = 0
        row['newDeaths'] = 0
        row['newRecovered'] = 0
    else:
        row['newConfirmed'] = 0
        row['newDeaths'] = 0
        row['newRecovered'] = 0
        
    return row
    
grouped = grouped.apply(new_cols, axis=1)

In [12]:
grouped.head()

Unnamed: 0,Country/Region,Province/State,Date,Deaths,Recovered,Confirmed,newConfirmed,newDeaths,newRecovered
0,Australia,Australian Capital Territory,2020-01-22,0,0.0,0,0,0,0
1,Australia,Australian Capital Territory,2020-01-23,0,0.0,0,0,0,0
2,Australia,Australian Capital Territory,2020-01-24,0,0.0,0,0,0,0
3,Australia,Australian Capital Territory,2020-01-25,0,0.0,0,0,0,0
4,Australia,Australian Capital Territory,2020-01-26,0,0.0,0,0,0,0


In [13]:
countries = grouped['Country/Region'].unique()
chart_json = {}

In [14]:
for country in countries:
    subset = grouped[grouped['Country/Region'] == country].copy().reset_index(drop=True)
    
    covid_jsons = []
    states = subset['Province/State'].unique()
    
    for state in states:
        subsubset = subset[subset['Province/State'] == state].copy().reset_index(drop=True)
        subsubset.sort_values(['Date'], inplace=True)
        
        covid_json = {
            'state': state,
            'dates': subsubset['Date'].tolist(),
            'confirmed': subsubset['Confirmed'].tolist(),
            'deaths': subsubset['Deaths'].tolist(),
            'recovered': subsubset['Recovered'].tolist(),
            'newConfirmed': subsubset['newConfirmed'].tolist(),
            'newDeaths': subsubset['newDeaths'].tolist(),
            'newRecovered': subsubset['newRecovered'].tolist()
        }
        covid_jsons.append(covid_json)
        
        daily = []
        for index, row in subsubset.iterrows():
            daily.append({
                'date': row['Date'],
                'confirmed': row['Confirmed'],
                'deaths': row['Deaths'],
                'recovered': row['Recovered'],
                'newConfirmed': row['newConfirmed'],
                'newDeaths': row['newDeaths'],
                'newRecovered': row['newRecovered']
            })
            
        chart_json[state] = {
            'country': state,
            'data': daily
        }
        
    with open(f'{datapath}/{country}.json', 'w') as country_json:
        json.dump(covid_jsons, country_json)

In [15]:
with open(f'{datapath}/countries_chart.json', 'w') as covid_chart:
    json.dump(chart_json, covid_chart)