# COVID-19 population penetration

#### Hypothesis

H0: More than 50% of Indian populaiton will contact COVID-19.

H1: People will get vaccination before COVID-19 infects 50% of Indian Population.

We will study COVID-19 data w.r.t to below two broad categories and then we will specifically look into parameters 
which when combined can come up with spread rate of COVID-19.

Categories:

1- National
  
2- Global

Sub-Categories:

a) Confirmed cases
b) Confirmed recoveries
c) Confirmed Fatalties
d) Confirmed Vaccinated

Once we analyse the above categories w.r.t to sub-categories we will move ahead by converting the data spread into PDF which will give us a timeline of the spread.

We will also try to visualise any kind of pattern found during our analysis.

In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from joypy import joyplot
import chart_studio.plotly as py
import cufflinks as cf
import plotly.express as px
%matplotlib inline

#Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

# Use Plotly locally
cf.go_offline()

# Allows us to create graph objects for making more customized plots
import plotly.graph_objects as go

### Reading Covid-19 Data

#### INDIA

In [12]:
dsi_ts = pd.read_csv('https://api.covid19india.org/csv/latest/case_time_series.csv') # case time series
dsi_sw =  pd.read_csv('https://api.covid19india.org/csv/latest/state_wise.csv') # state wise time series
dsi_dw =  pd.read_csv('https://api.covid19india.org/csv/latest/district_wise.csv') # district wise time series
dsi_swd =  pd.read_csv('https://api.covid19india.org/csv/latest/state_wise_daily.csv') # state wise daily
dsi_vsw =  pd.read_csv('http://api.covid19india.org/csv/latest/vaccine_doses_statewise.csv') # vaccine state wise
#dsi_sw =  pd.read_csv('http://api.covid19india.org/csv/latest/cowin_vaccine_data_statewise.csv')
#dsi_sw =  pd.read_csv('http://api.covid19india.org/csv/latest/cowin_vaccine_data_districtwise.csv')

In [13]:
# understanding data
print('dsi_ts keys {}, dsi_sw keys {}, dsi_dw keys {}, dsi_swd keys {}, dsi_vsw keys {} '.format(dsi_ts.keys(), 
                                                                                          dsi_sw.keys(),
                                                                                         dsi_dw.keys(),
                                                                                         dsi_swd.keys(),
                                                                                         dsi_vsw.keys()))
# printing dataframe information 
print('dsi_ts info {}, dsi_sw info {}, dsi_dw info {}, dsi_swd info {}, dsi_vsw info {} '.format(dsi_ts.info(), 
                                                                                          dsi_sw.info(),
                                                                                         dsi_dw.info(),
                                                                                         dsi_swd.info(),
                                                                                         dsi_vsw.info()))

dsi_ts keys Index(['Date', 'Date_YMD', 'Daily Confirmed', 'Total Confirmed',
       'Daily Recovered', 'Total Recovered', 'Daily Deceased',
       'Total Deceased'],
      dtype='object'), dsi_sw keys Index(['State', 'Confirmed', 'Recovered', 'Deaths', 'Active',
       'Last_Updated_Time', 'Migrated_Other', 'State_code', 'Delta_Confirmed',
       'Delta_Recovered', 'Delta_Deaths', 'State_Notes'],
      dtype='object'), dsi_dw keys Index(['SlNo', 'State_Code', 'State', 'District_Key', 'District', 'Confirmed',
       'Active', 'Recovered', 'Deceased', 'Migrated_Other', 'Delta_Confirmed',
       'Delta_Active', 'Delta_Recovered', 'Delta_Deceased', 'District_Notes',
       'Last_Updated'],
      dtype='object'), dsi_swd keys Index(['Date', 'Date_YMD', 'Status', 'TT', 'AN', 'AP', 'AR', 'AS', 'BR', 'CH',
       'CT', 'DN', 'DD', 'DL', 'GA', 'GJ', 'HR', 'HP', 'JK', 'JH', 'KA', 'KL',
       'LA', 'LD', 'MP', 'MH', 'MN', 'ML', 'MZ', 'NL', 'OR', 'PY', 'PB', 'RJ',
       'SK', 'TN', 'TG', 'TR', '

In [14]:
# Let's take single dataframe at a time.
dsi_ts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 475 entries, 0 to 474
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Date             475 non-null    object
 1   Date_YMD         475 non-null    object
 2   Daily Confirmed  475 non-null    int64 
 3   Total Confirmed  475 non-null    int64 
 4   Daily Recovered  475 non-null    int64 
 5   Total Recovered  475 non-null    int64 
 6   Daily Deceased   475 non-null    int64 
 7   Total Deceased   475 non-null    int64 
dtypes: int64(6), object(2)
memory usage: 29.8+ KB


In [15]:
# setting index as dateymd and dropping date column
dsi_ts = dsi_ts.drop(['Date'], axis = 1)
dsi_ts = dsi_ts.set_index('Date_YMD')
dsi_ts.index.names = [None]

In [16]:
dsi_ts.tail(45)

Unnamed: 0,Daily Confirmed,Total Confirmed,Daily Recovered,Total Recovered,Daily Deceased,Total Deceased
2021-04-04,103794,12587843,52840,11679796,477,164541
2021-04-05,96563,12684406,50100,11729896,446,164987
2021-04-06,115312,12799718,59714,11789610,630,165617
2021-04-07,126276,12925994,59137,11848747,684,166301
2021-04-08,131878,13057872,61829,11910576,802,167103
2021-04-09,144945,13202817,77263,11987839,773,167876
2021-04-10,152565,13355382,90328,12078167,838,168714
2021-04-11,169914,13525296,75380,12153547,904,169618
2021-04-12,160838,13686134,96746,12250293,880,170498
2021-04-13,185297,13871431,82271,12332564,1026,171524


In [17]:
px.line(dsi_ts, x=dsi_ts.index, y='Daily Confirmed', labels={'x':'date', 'y':'Daily-Confirmed'})

In [32]:
# Create a figure to which I'll add plots
fig = go.Figure()
# You can pull individual columns of data from the dataset and use markers or not
fig.add_trace(go.Scatter(x=dsi_ts.index, y=dsi_ts['Daily Confirmed'], 
                        mode='lines', name='Daily Confirmed',
                        line=dict(color='rgb(150,30,10)', width=2)))

fig.add_trace(go.Scatter(x=dsi_ts.index, y=dsi_ts['Daily Recovered'], 
                        mode='lines', name='Daily Recovered',
                        line=dict(color='rgb(50,200,15)', width=2)))

fig.add_trace(go.Scatter(x=dsi_ts.index, y=dsi_ts['Daily Deceased'], 
                        mode='lines', name='Daily Deceased',
                        line=dict(color='rgb(240,30,10)', width=2)))
# You can create custom lines (Dashes : dash, dot, dashdot)
# fig.add_trace(go.Scatter(x=dsi_ts.index, y=dsi_ts['Daily Deceased'], 
#                         mode='lines+markers', name='Daily Deceased',
#                         line=dict(color='firebrick', width=2, dash='dashdot')))
# Further style the figure
# fig.update_layout(title='Stock Price Data 2018 - 2020',
#                    xaxis_title='Price', yaxis_title='Date')

# Go crazy styling the figure
fig.update_layout(
    # Shows gray line without grid, styling fonts, linewidths and more
    xaxis=dict(
        showline=True,
        showgrid=True,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=12,
            color='rgb(82, 82, 82)',
        ),
    ),
    # Turn off everything on y axis
    yaxis=dict(
        showgrid=True,
        zeroline=True,
        showline=True,
        showticklabels=True,
    ),
    autosize=True,
    margin=dict(
        autoexpand=True,
        l=100,
        r=20,
        t=110,
    ),
    showlegend=True,
    plot_bgcolor='white'
)

In [None]:
#URLI = "https://api.covid19india.org/data.json"
#result = requests.get(URLI).json()
#dfI = pd.DataFrame(result["cases_time_series"])
#dfI.to_excel("data.xlsx", index=False)

In [None]:
data_india_1.tail()

In [None]:
URLI = "https://api.covid19india.org/data.json"
resultI = requests.get(URLI).json()
df1 = pd.DataFrame(resultI["cases_time_series"])

In [None]:
print(df1.keys())

In [None]:
df1.tail(15)

In [None]:
# setting index as dateymd and dropping date column
df1 = df1.drop(['date'], axis = 1)
df1 = df1.set_index('dateymd')
df1.index.names = [None]

In [None]:
df1.tail(15)

In [None]:
df1.info()

In [None]:
df1.index = pd.to_datetime(df1.index)
df1["dailyconfirmed"] = df1["dailyconfirmed"].astype(int)/1000
df1["dailydeceased"] = df1["dailydeceased"].astype(int)/1000
df1["dailyrecovered"] = df1["dailyrecovered"].astype(int)/1000
df1["totalconfirmed"] = df1["totalconfirmed"].astype(int)/1000
df1["totaldeceased"] = df1["totaldeceased"].astype(int)/1000
df1["totalrecovered"] = df1["totalrecovered"].astype(int)/1000

In [None]:
df1.info()

In [None]:
# some basic plot
df1['dailyconfirmed'].plot()

In [None]:
df1.plot(subplots=True, figsize=(10,12))

In [None]:
# comparing daily, weekly (mean) , monthly (mean) 
df_month = df1.resample("M").mean()
df_week = df1.resample("W").mean()

In [None]:
# daily vs weekly
start, end = '2020-01', '2021-04'
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(df1.loc[start:end, 'dailyconfirmed'], marker='.', linestyle='-', linewidth = 0.1, label='Daily', color='black')
ax.plot(df_week.loc[start:end, 'dailyconfirmed'], marker='.', markersize=2, linestyle='-', label='Weekly', color='coral')
#label='Monthly', color='violet'
ax.set_ylabel("daily confirmed")
ax.legend()

In [None]:
# 7 day rolling 
df_7d_rolling = df1.rolling(7, center=True).mean()
start, end = '2020-01', '2021-04'
fig, ax = plt.subplots(figsize=(10,10))
#fig =  plt.figure(figsize = (20,8), dpi = 100)
ax.plot(df1.loc[start:end, 'dailyconfirmed'], marker='.', linestyle='-', 
        linewidth=0.1, label='Daily')
ax.plot(df_week.loc[start:end, 'dailyconfirmed'], marker='o', markersize=1, 
        linestyle='-', label = 'Weekly Avg')
ax.plot(df_7d_rolling.loc[start:end, 'dailyconfirmed'], marker='.', markersize=1, linestyle='-', label='7d Rolling Avg')
ax.set_ylabel('daily confirmed')
ax.legend()

In [None]:
# daily confirmed  vs dailyrecovered
start, end = '2020-01', '2021-04'
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(df1.loc[start:end, 'dailyconfirmed'], marker='o', markersize=3, linestyle='-', linewidth = 0.2, label='confirmed', color='orange')
ax.plot(df1.loc[start:end, 'dailyrecovered'], marker='o', markersize=3, linestyle='-', linewidth = 0.2, label='recovered', color='green')
ax.plot(df1.loc[start:end, 'dailydeceased'], marker='o', markersize=3, linestyle='-', linewidth = 0.2, label='death', color='red')
#label='Monthly', color='violet'
ax.set_ylabel("con vs rec vs dead")
ax.legend()

In [None]:
# total confirmed  vs dailyrecovered
start, end = '2020-01', '2021-04'
fig, ax = plt.subplots(figsize=(10,10))
ax.plot(df1.loc[start:end, 'totalconfirmed'], marker='.', markersize=3, linestyle='-', linewidth = 0.2, label='confirmed', color='orange')
ax.plot(df1.loc[start:end, 'totalrecovered'], marker='.', markersize=3, linestyle='-', linewidth = 0.2, label='recovered', color='green')
#ax.plot(df1.loc[start:end, 'totaldeceased'], marker='.', markersize=3, linestyle='-', linewidth = 0.2, label='death', color='red')
#label='Monthly', color='violet'
ax.set_ylabel("con vs rec")
ax.legend()

In [None]:
cat_month = CategoricalDtype(
    ['January', 'February', 'March', 'April', 'May', 'June','July', 'August',  'September','October', 'November', 'December'])

In [None]:
df['Month'] = df['Month'].astype(cat_month)

# Creating a Ridgeline Plot

In [None]:
plt.figure()

joyplot(
    data=df[['dailyconfirmed', 'Month']], 
    by='Month',
    figsize=(10, 6)
)
plt.title('Ridgeline Plot of Covid-19 Cases in India for the last 12 months', fontsize=20)
plt.show()

# Ridgeline plot for multiple variables

In [None]:
plt.figure()

ax, fig = joyplot(
     data=df[['dailyrecovered', 'dailyconfirmed', 'Month']], 
      by='Month',
      figsize=(12, 8),
      column=['dailyrecovered', 'dailyconfirmed'],
      color=['#686de0', '#eb4d4b'],
    legend=True,
    alpha=0.85,
)
plt.title('Ridgeline Plot of Covid-19 Cases and Recoveries in India for the last 3 months', fontsize=20)
plt.show()

#### WORLD

In [None]:
URLW = "https://opendata.ecdc.europa.eu/covid19/nationalcasedeath_eueea_daily_ei/json/"
resultw = requests.get(URLW).json()
df2 = pd.DataFrame(resultw["records"])

In [None]:
df2.head()

In [None]:
df2.info()

In [None]:
df2.columns

In [None]:
df2.continentExp.tail()