## import library

In [88]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from datetime import timedelta, datetime

## import data sets

In [89]:
confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
deaths_df =pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
recovered_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
cases_country_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

In [90]:
country_df = confirmed_df[confirmed_df['Country/Region'] == "Bangladesh"]
data= country_df.iloc[0].loc['1/31/20']

country_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,6/10/20,6/11/20,6/12/20,6/13/20,6/14/20,6/15/20,6/16/20,6/17/20,6/18/20,6/19/20
20,,Bangladesh,23.685,90.3563,0,0,0,0,0,0,...,74865,78052,81523,84379,87520,90619,94481,98489,102292,105535


## shapes of datasets

In [91]:
print(confirmed_df.shape)
print(deaths_df.shape)
print(recovered_df.shape)
print(cases_country_df.shape)

(266, 154)
(266, 155)
(253, 154)
(188, 14)


## analysis of total cases

In [92]:
cases_country_df.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3
0,Australia,2020-06-21 04:33:19,-25.0,133.0,7461.0,102.0,6896.0,463.0,29.305137,,,1.367109,36,AUS
1,Austria,2020-06-21 04:33:19,47.5162,14.5501,17323.0,688.0,16175.0,460.0,192.341002,,,3.971598,40,AUT
2,Canada,2020-06-21 04:33:19,60.001,-95.001,102762.0,8466.0,64826.0,29471.0,271.457124,,,8.238454,124,CAN
3,China,2020-06-21 04:33:19,30.5928,114.3055,84553.0,4639.0,79534.0,380.0,6.019394,,,5.4865,156,CHN
4,Denmark,2020-06-21 04:33:19,56.2639,9.5018,12591.0,600.0,11482.0,509.0,217.378431,,,4.765309,208,DNK


In [93]:
global_cases= cases_country_df.copy().drop(["Country_Region","Last_Update","Lat","Long_","Mortality_Rate","Incident_Rate","People_Tested","People_Hospitalized","UID","ISO3"],axis=1)
summary_global_cases = pd.DataFrame(global_cases.sum()).T

In [94]:
summary_global_cases

Unnamed: 0,Confirmed,Deaths,Recovered,Active
0,8791794.0,464465.0,4365932.0,3887448.0


In [95]:
confirmed_df["Country/Region"] = "Bangladesh"



In [96]:
confirmed_tf=confirmed_df.copy().drop(["Province/State","Country/Region","Lat","Long"],axis=1)
summary_confirmed_tf=confirmed_tf.sum()
summary_confirmed_tf

1/22/20        555
1/23/20        654
1/24/20        941
1/25/20       1434
1/26/20       2118
            ...   
6/15/20    8034461
6/16/20    8173940
6/17/20    8349950
6/18/20    8488976
6/19/20    8664753
Length: 150, dtype: int64

## Figure 1 (Total confirmed cases)

In [97]:
fig1 = go.Figure(data=go.Scatter(x=summary_confirmed_tf.index,y=summary_confirmed_tf.values, mode="lines+markers"))
fig1.update_layout(title="Total confirmed cases (globally)",yaxis_title ="confirmed cases"
                  ,xaxis_tickangle=315)
fig1.show()

In [98]:
# color array
color_arr = px.colors.qualitative.Dark24

In [99]:
# Drowing a graph function 
def draw(ts_array,ts_label,title,colors,mode_size,line_size,x_axis_title,y_axis_title,tickangle=0,y_axis_type=""):
    fig=go.Figure()
    for index,ts in enumerate(ts_array):
        fig.add_trace(go.Scatter(x=ts.index,y=ts.values,name=ts_label[index],line=dict(color=colors[index],width=line_size[index]),connectgaps=True))
    x_axis_dict=dict(showline=True ,showgrid=True,showticklabels=True,linecolor="rgb(204,204,204)",linewidth=2,ticks='outside')
    if x_axis_title:
        x_axis_dict["title"]=x_axis_title
    if tickangle > 0:
        x_axis_dict['tickangle']=tickangle
    y_axis_dict=dict(showline=True ,showgrid=True,showticklabels=True,linecolor="rgb(204,204,204)",linewidth=2,ticks='outside')
    if y_axis_type!="":
        y_axis_dict['type']=y_axis_type
    if y_axis_title:
        y_axis_dict['title']=y_axis_title
    fig.update_layout(xaxis=x_axis_dict,yaxis=y_axis_dict,autosize=True,showlegend=True,)
    annotations=[]
    annotations.append(dict(xref="paper",yref="paper",x=0.0,y=1.5,xanchor='left',yanchor='bottom',text=title,font=dict(family='Arial',size=16,color='rgb(37,37,37)'),showarrow=False))
       
    fig.update_layout(annotations=annotations)
        
        
    return fig

In [100]:
confirmed_df_tf=confirmed_df.copy().drop(["Province/State","Country/Region","Lat","Long"],axis=1).sum()
recovered_tf=recovered_df.copy().drop(["Province/State","Country/Region","Lat","Long"],axis=1).sum()
deaths_tf=deaths_df.copy().drop(["Province/State","Country/Region","Lat","Long"],axis=1).sum()

In [101]:
active_tf =pd.Series(data=np.array([x1 - x2 -x3 for (x1,x2,x3) in zip(confirmed_df_tf.values,recovered_tf.values,deaths_tf.values)]),index=confirmed_df_tf.index)


In [102]:
ts_array=[confirmed_df_tf,active_tf,recovered_tf,deaths_tf]
labels=['confirmed','Active','recovered','Deaths']
color=[color_arr[5],color_arr[0],color_arr[2],color_arr[3]]
mood_size =[8,8,2,8]
line_size=[2,2,4,2]

In [103]:
fig2=draw(ts_array=ts_array,ts_label=labels,title="graph",colors=color,mode_size=mood_size,line_size=line_size,x_axis_title="Date",y_axis_title="case Count",tickangle=315,y_axis_type="")

In [104]:
fig2.show()

In [105]:
cases_country_df.copy().drop(["Last_Update","Lat","Long_","Mortality_Rate","Incident_Rate","People_Tested","People_Hospitalized","UID","ISO3"],axis=1).sort_values('Confirmed',ascending=False).reset_index(drop=True).style.bar(align='left',width=98,color='#d65f5f')

Unnamed: 0,Country_Region,Confirmed,Deaths,Recovered,Active
0,US,2255119.0,119719.0,617460.0,1443989.0
1,Brazil,1032913.0,49976.0,576779.0,406158.0
2,Russia,576162.0,7992.0,334024.0,234146.0
3,India,410451.0,13254.0,227728.0,169469.0
4,United Kingdom,304580.0,42674.0,1319.0,260587.0
5,Peru,251338.0,7861.0,143017.0,100460.0
6,Spain,245938.0,28322.0,150376.0,67240.0
7,Italy,238275.0,34610.0,182453.0,21212.0
8,Chile,236748.0,4295.0,196609.0,35844.0
9,Iran,202584.0,9507.0,161384.0,31693.0


## analysis for Bangladesh

In [109]:

confirmed_bd = confirmed_df[confirmed_df['Country/Region'] == "Bangladesh"]



In [110]:
confirmed_bd=confirmed_bd.drop(['Country/Region','Province/State','Lat','Long'],axis=1).reset_index(drop=True).sum()

In [111]:

confirmed_bd

1/22/20        555
1/23/20        654
1/24/20        941
1/25/20       1434
1/26/20       2118
            ...   
6/15/20    8034461
6/16/20    8173940
6/17/20    8349950
6/18/20    8488976
6/19/20    8664753
Length: 150, dtype: int64

In [112]:
deaths_bd = deaths_df[deaths_df['Country/Region']=='Bangladesh']
deaths_bd=deaths_bd.drop(['Country/Region','Province/State','Lat','Long'],axis=1).reset_index(drop=True).sum()

In [113]:
recover_bd = recovered_df[deaths_df['Country/Region']=='Bangladesh']
recover_bd=recover_bd.drop(['Country/Region','Province/State','Lat','Long'],axis=1).reset_index(drop=True).sum()


Boolean Series key will be reindexed to match DataFrame index.



In [114]:
type(recover_bd)

pandas.core.series.Series

In [115]:
active_bd = pd.Series(
np.array([x1-x2-x3 for (x1,x2,x3)in zip(confirmed_bd.values,deaths_bd.values,recover_bd.values)])
,index=confirmed_bd.index)

In [116]:
active_bd

1/22/20        555
1/23/20        654
1/24/20        941
1/25/20       1434
1/26/20       2118
            ...   
6/15/20    8014521
6/16/20    8136414
6/17/20    8310456
6/18/20    8447469
6/19/20    8620420
Length: 150, dtype: int64

In [117]:
bd_array=[confirmed_bd,active_bd,recover_bd,deaths_bd]
labels=['confirmed','Active','recovered','Deaths']
color=[color_arr[5],color_arr[0],color_arr[2],color_arr[3]]
mood_size =[8,8,2,8]
line_size=[2,2,4,2]

In [125]:
fig3=draw(ts_array=bd_array,ts_label=labels,title="graph",colors=color,mode_size=mood_size,line_size=line_size,x_axis_title="Date",y_axis_title="case Count",tickangle=315)

In [126]:
fig3.show()

In [30]:
type(bd_array)

list

In [65]:
confirm

array([    555,     654,     941,    1434,    2118,    2927,    5578,
          6166,    8234,    9927,   12038,   16787,   19881,   23892,
         27635,   30794,   34391,   37120,   40150,   42762,   44802,
         45221,   60368,   66885,   69030,   71224,   73258,   75136,
         75639,   76197,   76819,   78572,   78958,   79525,   80372,
         81346,   82704,   84070,   85967,   88325,   90262,   92795,
         95075,   97844,  101761,  105782,  109754,  113536,  118375,
        125704,  130909,  145204,  156283,  167022,  181452,  196917,
        216161,  243084,  272698,  304844,  337597,  378381,  418569,
        468155,  530138,  594178,  661544,  720695,  783580,  858317,
        933905, 1014713, 1097193, 1177447, 1251123, 1322598, 1397537,
       1481490, 1567423, 1659674, 1737813, 1836615, 1906692, 1977287,
       2057584, 2153578, 2242537, 2316591, 2399451, 2472264, 2546905,
       2622750, 2711635, 2799064, 2884420, 2958352, 3027215, 3101078,
       3176596, 3261

In [31]:
confirm = confirmed_bd.values
confirm=np.array(confirm)
death = deaths_bd.values
death=np.array(death).flatten()
active= active_bd.values
active=np.array(active).flatten()
recover = recover_bd.values
recover=np.array(recover).flatten()
index.shape

(150,)

In [32]:
d = {'date':index ,'confirm':confirm,
        'death':death,
        'active':active,
        'recover':recover}

In [33]:
df = pd.DataFrame(data=d)

In [34]:
df[::20]

Unnamed: 0,date,confirm,death,active,recover
0,1/22/20,555,0,555,0
20,2/11/20,44802,0,44802,0
40,3/2/20,90262,0,90262,0
60,3/22/20,337597,2,337592,3
80,4/11/20,1737813,30,1737747,36
100,5/1/20,3349915,170,3349571,174
120,5/21/20,5112366,408,5106356,5602
140,6/10/20,7376333,1012,7359421,15900


Unnamed: 0,date,confirm,death,active,recover
0,1/22/20,555,0,555,0
1,1/23/20,654,0,654,0
2,1/24/20,941,0,941,0
3,1/25/20,1434,0,1434,0
4,1/26/20,2118,0,2118,0
...,...,...,...,...,...
145,6/15/20,8034461,1209,8014521,18731
146,6/16/20,8173940,1262,8136414,36264
147,6/17/20,8349950,1305,8310456,38189
148,6/18/20,8488976,1343,8447469,40164


In [64]:
df['new'] =df['confirm'].diff( periods = 1) 
df

Unnamed: 0,date,confirm,death,active,recover,new
0,1/22/20,555,0,555,0,
1,1/23/20,654,0,654,0,99.0
2,1/24/20,941,0,941,0,287.0
3,1/25/20,1434,0,1434,0,493.0
4,1/26/20,2118,0,2118,0,684.0
...,...,...,...,...,...,...
145,6/15/20,8034461,1209,8014521,18731,122035.0
146,6/16/20,8173940,1262,8136414,36264,139479.0
147,6/17/20,8349950,1305,8310456,38189,176010.0
148,6/18/20,8488976,1343,8447469,40164,139026.0


In [40]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
x=df.drop(['death','date'],axis=1)
y=df['death']

               


x_train , x_test ,y_train,y_test = train_test_split(x,y,test_size = .20) 
model = RandomForestRegressor()
model.fit(x_train,y_train)
model.score(x_test,y_test)


0.9983363808156577

In [41]:
y_preds=model.predict(x_test)
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test,y_preds)

7.019666666666664

In [42]:
test=np.array([105535,61202,44333])
test=test.reshape(1,-1)

pre=model.predict(test)
pre


array([362.13])