In [1]:
#Mounting the drive to link with the data.
import os
from google.colab import drive
drive.mount('/content/gdrive')
print(os.listdir("/content/gdrive/My Drive/"))

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
['Thesis Earthquake Data', 'Colab Notebooks', 'MSc_Research_Project_Report_Hypocenter_depth (1).docx', 'transaction_201705.csv', 'Untitled1.ipynb', 'transcnttransamt.PNG', 'CoronaConfirmed.csv', 'CoronaDeaths.csv', 'CoronaRecovered.csv']


In [0]:
#importing the datasets which were prepared in R by merging all the dates into 1 column
import pandas as pd
CoronaConfirmed = pd.read_csv('/content/gdrive/My Drive/CoronaConfirmed.csv')
CoronaDeaths = pd.read_csv('/content/gdrive/My Drive/CoronaDeaths.csv')
CoronaRecovered = pd.read_csv('/content/gdrive/My Drive/CoronaRecovered.csv')

In [0]:
print(CoronaConfirmed.isna().sum())
print(CoronaDeaths.isna().sum())
print(CoronaRecovered.isna().sum())

In [0]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, plot_mpl

In [0]:
for dirname, _, filenames in os.walk('./content/gdrive/My Drive/'):
    for file in filenames:
        if file.endswith('CoronaConfirmed.csv'):
            CoronaConfirmed = pd.read_csv(os.path.join(dirname,file))
        elif file.endswith('CoronaDeaths.csv'):
            CoronaDeaths = pd.read_csv(os.path.join(dirname,file))
        elif file.endswith('CoronaRecovered.csv'):
            CoronaRecovered = pd.read_csv(os.path.join(dirname,file))

In [0]:
#combining date columns into a single one
CoronaConfirmed = CoronaConfirmed.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                                     var_name="Date", value_name="Confirmed")


CoronaDeaths = CoronaDeaths.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                             var_name="Date", value_name="Deaths")


CoronaRecovered = CoronaRecovered.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
                                     var_name="Date",  value_name="Recovered")

In [0]:
CoronaData = pd.concat([CoronaConfirmed, CoronaDeaths['Deaths'] , CoronaRecovered['Recovered']], axis=1)

In [0]:
# Accounting for Missing Values:
CoronaData[["Deaths", "Recovered", "Confirmed"]] = CoronaData[["Deaths", "Recovered", "Confirmed"]].apply(lambda s: s.fillna(0))

In [0]:
# Changing Date column into "Date" type from String:
CoronaData.loc[:, "Date"] = CoronaData["Date"].apply(lambda s: pd.to_datetime(s).date())


In [10]:
CoronaData["Country/Region"].value_counts()

Mainland China    1178
US                 646
Australia          190
Canada             152
France              38
                  ... 
Iceland             38
Romania             38
Kuwait              38
New Zealand         38
San Marino          38
Name: Country/Region, Length: 61, dtype: int64

In [11]:
CoronaData.dropna(
    subset=["Province/State"])["Country/Region"].value_counts()

Mainland China    1178
US                 646
Australia          190
Canada             152
Macau               38
Others              38
Taiwan              38
Hong Kong           38
Name: Country/Region, dtype: int64

In [0]:
#FOR China
# Latest date:
latest = CoronaData["Date"].max()

# Preparing data for plotting:
bar_data = {}

bar_data["Confirmed"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] == "Mainland China")].groupby(['Province/State'])[CoronaData.columns[-3]].sum().reset_index().sort_values("Confirmed", ascending=False)
    
bar_data["Deaths"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] == "Mainland China")].groupby(['Province/State'])[CoronaData.columns[-2]].sum().reset_index().sort_values("Deaths", ascending=False)
    
bar_data["Recovered"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] == "Mainland China")].groupby(['Province/State'])[CoronaData.columns[-1]].sum().reset_index().sort_values("Recovered", ascending=False)

colors = {'Confirmed': 'orange',
          'Deaths': 'red',
          'Recovered': 'darkgreen',
             }

In [13]:
#Creating subplots for Confirmed, Deaths and Recoveries:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Bar(x=bar_data["Confirmed"]['Province/State'], 
           y=bar_data["Confirmed"]['Confirmed'],
           name="Confirmed",
           text=bar_data["Confirmed"]['Confirmed'],
           marker={'color': colors['Confirmed']}),
    row=1, col=1,
    
)

fig.add_trace(
    go.Bar(x=bar_data["Deaths"]['Province/State'], 
           y=bar_data["Deaths"]['Deaths'],
           name="Deaths",
           text=bar_data["Deaths"]['Deaths'],
           marker={'color': colors['Deaths']}),
    row=1, col=1,
    
)

fig.add_trace(
    go.Bar(x=bar_data["Recovered"]['Province/State'], 
           y=bar_data["Recovered"]['Recovered'],
           name="Recovered",
           text=bar_data["Recovered"]['Recovered'],
           marker={'color': colors['Recovered']}),
    row=1, col=1,
)

fig.update_layout(height=600, showlegend=True)
fig.update_layout(barmode='group')
fig.update_layout(title_text="Confirmed, Deaths and Recovered Cases : China", showlegend=True)
fig.show()

In [0]:
# Plotting time series graph of China data:
fetch_dates = CoronaData["Date"].unique().astype('str')
line_data = CoronaData[CoronaData["Country/Region"] == 'Mainland China'].groupby(["Date"]).agg({'Confirmed' : 'sum',
                                        'Deaths': 'sum',
                                        'Recovered': 'sum'}).reset_index()
fig = px.line(data_frame=line_data, x='Date', y='Confirmed')
fig.add_scatter(x=line_data['Date'],y=line_data['Recovered'],
              mode='lines')
fig.update_layout(title_text="Confirmed vs Recovered: China", showlegend=True)

In [0]:
#Rest of the world
bar_data["Confirmed"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] != "Mainland China")]. \
     groupby(['Country/Region'])[CoronaData.columns[-3]].sum().reset_index(). \
     sort_values(by="Confirmed", ascending=False)
    
bar_data["Deaths"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] != "Mainland China")]. \
     groupby(['Country/Region'])[CoronaData.columns[-2]].sum().reset_index(). \
     sort_values(by="Deaths", ascending=False)
    
bar_data["Recovered"] = CoronaData[
    (CoronaData["Date"] == latest) & (CoronaData["Country/Region"] != "Mainland China")]. \
     groupby(['Country/Region'])[CoronaData.columns[-1]].sum().reset_index(). \
     sort_values(by="Recovered", ascending=False)





In [0]:
# Plotting confirmed data:
px.bar(bar_data["Confirmed"], 
       x='Country/Region', 
       y='Confirmed',
       orientation = 'v',
       text = 'Confirmed',
       color_discrete_sequence = px.colors.colorbrewer.Purples_r,
       title="Rest of The world Corona Confirmed Cases")

In [0]:
# Plotting Deaths data:
px.bar(bar_data["Deaths"], 
       x='Country/Region', 
       y='Deaths',
       orientation = 'v',
       text = 'Deaths',
       color_discrete_sequence = px.colors.colorbrewer.OrRd_r,
       title="Rest of The world Corona Death Cases")

In [0]:
# Plotting Recovered data:
px.bar(bar_data["Recovered"], 
       x='Country/Region', 
       y='Recovered',
       orientation = 'v',
       text = 'Recovered',
       color_discrete_sequence = px.colors.colorbrewer.Accent,
       title="Rest of The world Corona Recovered Cases")

In [0]:
#Confirmed vs Recovered Rest of the World
fetch_dates = CoronaData["Date"].unique().astype('str')
line_data = CoronaData[CoronaData["Country/Region"] != 'Mainland China'].groupby(["Date"]).agg({'Confirmed' : 'sum',
                                        'Deaths': 'sum',
                                        'Recovered': 'sum'}).reset_index()
fig = px.line(data_frame=line_data, x='Date', y='Confirmed')
fig.add_scatter(x=line_data['Date'],y=line_data['Recovered'],
              mode='lines')
fig.update_layout(title_text="Confirmed vs Recovered: Rest of the world", showlegend=False)