## Johns Hopkins COVID-19 Data Analysis - version 1

#### Data - https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

### Code developed by Swanand Mhalagi
#### For further info
#### https://www.linkedin.com/in/swanand-mhalagi-8b1813a7/
#### https://github.com/swanandM
#### https://medium.com/@swan1991m

#### Below code uses Linux packages like NumPy, Pandas, Plotly, Cufflinks and Prophet

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
!wget -N -q --timestamping https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv

In [3]:
import numpy as np # linear algebra
import pandas as pd 
import cufflinks as cf
import plotly.offline
import plotly.graph_objects as go
from fbprophet import Prophet

cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# show all pandas columns and only 2 decimal points 
pd.set_option("display.max.columns", None)
pd.set_option("display.precision", 3)

In [4]:
df = pd.read_csv('time_series_covid19_confirmed_global.csv')
#df_csv.head()

In [5]:
def country_increase_rate(df_Country, country):
    #Select data for whole country
    df_Country = df_Country[df_Country['Province/State'].isnull()] 
    df_Country = df_Country.drop(['Province/State', 'Lat', 'Long'], axis=1)
    df_Country = df_Country.set_index('Country/Region') 
    df_Country = df_Country.diff(axis=1)
    df_Country = df_Country.T
    df_Country = df_Country.reset_index()
    
    #Select data for perticular country
    one_country = df_Country[["index", country]]
    one_country= pd.DataFrame(one_country)
    one_country.columns = ['ds','y']
    one_country['ds'] = pd.to_datetime(one_country["ds"])
    one_country_copy = one_country.copy()
    one_country_copy = one_country_copy.set_index('ds') 
    return one_country_copy.iplot(mode='lines', xTitle='Date', yTitle='Case Count', title='Daily New Cases in ' + country)

In [6]:
country_increase_rate(df, "Italy")

In [7]:
country_increase_rate(df, "Germany")

In [8]:
country_increase_rate(df, "US")

## Estimation Model
#### Italy will take around 40 more days to reach negative coronavirus growth. Expect zero new cases around May 24th 2020 
#### Germany will report zero new cases around May 1st 2020
#### This is possible if these countries maintain or lower the COVID-19 spread rate

In [9]:
def estimation_model(df_Country, country, Days):
    #Select data for whole country
    df_Country = df_Country[df_Country['Province/State'].isnull()] 
    df_Country = df_Country.drop(['Province/State', 'Lat', 'Long'], axis=1)
    df_Country = df_Country.set_index('Country/Region') 
    df_Country = df_Country.diff(axis=1)
    df_Country = df_Country.T
    df_Country = df_Country.reset_index()   
    #Select data for perticular country
    one_country = df_Country[["index", country]]
    one_country= pd.DataFrame(one_country)
    one_country.columns = ['ds','y']
    one_country['ds'] = pd.to_datetime(one_country["ds"])
    #Use Prophet to project/estimate time series data
    basic = Prophet(changepoint_prior_scale=2.5)
    basic.fit(one_country)
    future= basic.make_future_dataframe(periods=Days)
    forecast=basic.predict(future)
    #fig1 = basic.plot(forecast)
    forecast = forecast.set_index('ds') 
    return forecast[['yhat']].iplot(mode='lines', xTitle='Date', yTitle='Case Count', title= country + ' Daily New Cases Forecast for next '+ str(Days) )

In [10]:
estimation_model(df, "Italy", Days = 40)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [11]:
estimation_model(df, "Germany", Days = 20)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [12]:
estimation_model(df, "US", Days = 20)

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


### Coronavirus Growth Rate and 5 Day Moving Average Trend Curve

In [13]:
def trend(df_csv):
    #Select data for whole country
    df_Country = df_csv[df_csv['Province/State'].isnull()] 
    df_Country = df_Country.drop(['Province/State', 'Lat', 'Long'], axis=1)
    
    #Read country labels
    contry_name = df_Country['Country/Region']
    df_Country = df_Country.drop(['Country/Region'], axis=1)
    df_Country = df_Country.diff(axis=1)
    df_Country = df_Country.dropna(axis='columns')
    
    #Reset axis, trip inital zeors till we see 1st COVID case
    df_Country = pd.DataFrame([np.trim_zeros(i) for i in df_Country.values], index=df_Country.index)
    df_Country['Country/Region'] = contry_name
    df_Country = df_Country.set_index('Country/Region') 
    
    #Only choose those counties where daily cases increased atleast once by 1000
    df_Country['keep'] = df_Country.apply(lambda row: (any([(x > 1000) for x in row])), axis = 1)
    df_Country = df_Country[df_Country['keep'] == 1]

    daily_increase = df_Country.T
    daily_increase.iplot(mode='lines', xTitle='Number of days from 1st confirmed case', yTitle='Case Count', title='Coronavirus spred rate starting with 1st confirmed case ', theme='solar')

    #Calculate 5 day moving average
    df_mul = pd.DataFrame()
    contry_name = df_Country.index
    for j in range(0, len(contry_name)):
        ma = pd.DataFrame()
        ma = df_Country.iloc[j,:].rolling(window=5).mean() 
        df_mul = df_mul.append(ma, ignore_index=True)

    df_mul['Country/Region'] = contry_name
    df_mul = df_mul.set_index('Country/Region')
    df_mul = df_mul.T
    df_mul.iplot(mode='lines', xTitle='Number of days from 1st confirmed case', yTitle='Case Count', title='Trend curve: 5 day Moving Average for New Cases', theme='solar')


In [11]:
trend(df)