In [1]:
import pandas as pd
import pandas_bokeh
pandas_bokeh.output_notebook()

In [2]:
df= pd.read_csv("../csv_final/World_cases_by_region.csv", sep=",").sort_values(by=["date"]).reset_index(drop=True).drop(columns = "Unnamed: 0")
df.head()

Unnamed: 0,state,date,site,dailycases,CODE_2 | NUTS_2
0,Thai,2020-01-04,Chon Buri,1.0,THA.12_1
1,Thai,2020-01-04,Pathum Thani,1.0,THA.37_1
2,Thai,2020-01-04,Krabi,1.0,THA.18_1
3,Thai,2020-01-04,Phuket,10.0,THA.48_1
4,Thai,2020-01-04,Nonthaburi,2.0,THA.36_1


In [8]:
df_total =df.groupby("state")["date"].agg(["last"]).reset_index()
df2=df.groupby("state").sum().reset_index()
df_total["total_cases"]=df2.dailycases
df_total = df_total.rename(columns = {"last": "Date"})
df_total['Date'] = pd.to_datetime(df_total['Date'])
di = {'Thai':'Thailand','Czech Republic':'Czechia'}
df_total = df_total.replace({"state": di})
df_total

Unnamed: 0,state,Date,total_cases
0,Algeria,2020-04-04,1251.0
1,Argentina,2020-05-23,11356.0
2,Australia,2020-05-25,7129.0
3,Austria,2020-04-13,13675.0
4,Bahrain,2020-04-08,829.0
5,Belgium,2020-05-06,50301.0
6,Bolivia,2020-05-25,6263.0
7,Brazil,2020-05-01,91298.0
8,Canada,2020-05-24,38579.0
9,Chile,2020-05-11,30073.0


In [4]:
df_total['country'] = df_total['state']
df_total = df_total.set_index('state')
df_total['Date'] = df_total['Date'].dt.strftime('%d-%m-%Y')

p_bar = df_total.plot_bokeh.bar(
    ylabel="Cases", 
    xlabel="Country",
    title="Total Cases by Countries", 
    legend = "top_left",
    figsize=(900, 600),
    vertical_xlabel=True,
    alpha=0.6)
#output_file("total_cases_by_country.html")
#save(p)



-----------------------------------------
## OWD and WHO data comparison

In [5]:
owd_df = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv", sep =",")
owd_df = owd_df[["location","date", "total_cases"]].rename(columns={"location":"state", "date":"Date", "total_cases": "OWD_cases"})
di = {'United States':'United States of America'}
owd_df = owd_df.replace({"state": di})
owd_df['Date'] = pd.to_datetime(owd_df['Date'])
owd_df

Unnamed: 0,state,Date,OWD_cases
0,Aruba,2020-03-13,2
1,Aruba,2020-03-20,4
2,Aruba,2020-03-24,12
3,Aruba,2020-03-25,17
4,Aruba,2020-03-26,19
...,...,...,...
19703,International,2020-02-28,705
19704,International,2020-02-29,705
19705,International,2020-03-01,705
19706,International,2020-03-02,705


In [6]:
who_df=pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/who_covid_19_situation_reports/who_covid_19_sit_rep_time_series/who_covid_19_sit_rep_time_series.csv", sep=",").iloc[40:254].drop(columns = ["Province/States","WHO region"]).reset_index(drop = True)
who_df = who_df.set_index('Country/Region')
who_df.columns = who_df.columns.str.split(expand=True)
who_df = who_df.stack().rename_axis(('state','Date')).reset_index().rename(columns = {0: "WHO_Cases"})
di = {'19-Apr':'4/19/2020'} ## buggato!!!
who_df = who_df.replace({"Date": di})
who_df['Date'] = pd.to_datetime(who_df['Date'])
who_df = who_df.drop_duplicates()
who_df.head()

Unnamed: 0,state,Date,WHO_Cases
0,China,2020-01-21,278.0
1,China,2020-01-22,309.0
2,China,2020-01-23,571.0
3,China,2020-01-24,830.0
4,China,2020-01-25,1297.0


In [9]:
df_comp = pd.merge(df_total, owd_df, how= "inner", on=['state', 'Date'])
df_comp = pd.merge(df_comp, who_df, how= "inner", on=['state', 'Date'])
df_comp

Unnamed: 0,state,Date,total_cases,OWD_cases,WHO_Cases
0,Algeria,2020-04-04,1251.0,1171,986.0
1,Austria,2020-04-13,13675.0,13937,13937.0
2,Bahrain,2020-04-08,829.0,811,811.0
3,Belgium,2020-05-06,50301.0,50509,50509.0
4,Brazil,2020-05-01,91298.0,85380,78162.0
5,Chile,2020-05-11,30073.0,28866,28866.0
6,China,2020-03-15,81072.0,80995,81048.0
7,Costa Rica,2020-05-01,733.0,719,713.0
8,Ethiopia,2020-05-06,163.0,145,145.0
9,Ghana,2020-03-29,31.0,141,137.0


In [10]:
df_comp['country'] = df_comp['state']
df_comp = df_comp.set_index('state')
df_comp['Date'] = df_comp['Date'].dt.strftime('%d-%m-%Y')

p_bar = df_comp.plot_bokeh.bar(
    ylabel="Cases", 
    xlabel="Country",
    title="Total Cases by Countries", 
    legend = "top_left",
    figsize=(900, 600),
    vertical_xlabel=True,
    hovertool_string="""<h2>  @country </h2>
                        <h4> Date: @Date </h4>
                        <h4> Total Cases Regions: @total_cases </h4>
                        <h4> Total Cases OWD: @OWD_cases </h4>
                        <h4> Total Cases WHO: @OWD_cases </h4>""",
    alpha=0.6)
#output_file("total_cases_by_country_comparison.html")
#save(p)



--------------------------------------