## Daily Code Workout Day 6(220430)
1. Monty Hall Problem
2. Covid Simulation
3. Covid Data Analysis(<- Here!)

Data Source: https://github.com/owid/covid-19-data/tree/master/public/data

#### 00. Packages Used

In [3]:
import pandas as pd
import numpy as np
import random

In [4]:
import matplotlib.pyplot as plt

In [5]:
org_df = pd.read_json("./owid-covid-data.json", orient="records")

In [6]:
c_dict = dict(zip(['USA', 'CHN', 'FRA', "DEU"], 
                  [pd.DataFrame(org_df[org_df.index=='data'][x][0])[["date",'new_cases', 'new_deaths']] 
                   for x in ['USA', 'CHN', 'FRA', "DEU"]]))

In [7]:
from scipy.signal import savgol_filter

def spike_detection(df_o, col = 'new_cases'):
    df = df_o.copy()
    df['first'] = df[col].diff(1)
    df = df.dropna()
    df['fr_smth'] = pd.Series(savgol_filter(df['first'], 99, 8), index = df.index)
    df['second'] = df['fr_smth'].diff(1)
    df = df.dropna()
    df['se_smth'] = pd.Series(savgol_filter(df['second'], 99, 8), index = df.index)
    return(df[(df['fr_smth'].shift(1)<=0) & (df['fr_smth']>=0) & (df['se_smth']>=0)].index)

In [8]:
# smoothing by savgol_filter
rec_df = pd.DataFrame([])
for country in list(c_dict):
    temp = []
    for  roll_n in [2, 4, 6, 8, 10, 12, 14]:
        pic_df = c_dict[country].copy().set_index('date')[['new_cases']].dropna()
        pic_df['new_cases'] = pd.Series(savgol_filter(pic_df['new_cases'], 99, roll_n), index = pic_df.index)
        temp.append({'param':roll_n, country:len(spike_detection(pic_df))})
    rec_df = pd.concat([rec_df, pd.DataFrame(temp).set_index('param')], axis = 1)
display(rec_df)

Unnamed: 0_level_0,USA,CHN,FRA,DEU
param,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,7,11,6,7
4,9,15,9,8
6,9,17,12,11
8,12,19,18,13
10,10,23,15,19
12,14,24,17,14
14,16,22,24,24


In [10]:
# smoothing by moving average
rec_df = pd.DataFrame([])
for country in list(c_dict):
    temp = []
    for roll_n in [6, 10, 14, 18, 22, 26, 30]:
        pic_df = c_dict[country].copy().set_index('date')[['new_cases']].dropna()
        pic_df = pic_df.rolling(roll_n).mean()
        temp.append({'param':roll_n, country:len(spike_detection(pic_df))})
    rec_df = pd.concat([rec_df, pd.DataFrame(temp).set_index('param')], axis = 1)
display(rec_df)

Unnamed: 0_level_0,USA,CHN,FRA,DEU
param,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,19,19,25,16
10,16,19,23,17
14,12,17,10,10
18,12,18,12,13
22,10,16,10,10
26,10,15,13,10
30,9,12,10,10
