### Required Installations 

In [None]:
# pip install pystan
# pip install fbprophet
# pip install seaborn
# pip install pandas
# pip install numpy
# pip install matplotlib
#pip install plotly

### Imports

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import seaborn as sns
import plotly.express as px
from fbprophet.plot import plot_plotly
import plotly.offline as py
import plotly.graph_objs as go
from ipywidgets import interact, widgets
from IPython import display

import warnings
warnings.filterwarnings('ignore')

from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

### Loading Data

In [None]:
covid_data = pd.read_csv('COVID-19.csv')
covid_data['day'] = pd.to_datetime(covid_data['day'], format="%Y/%m/%d")

## 1) On a given day find top 20 countries with most confirmed, most deaths, & most recovered

### Specify date in yyyy-mm-dd order

In [None]:
Given_Day = covid_data[covid_data['day']=='2020-04-10']

### Top 20 Countries with most Deaths
#### On a given day get the max for a country and sort it

In [None]:
Given_Day_Deaths = Given_Day[['Country Name', 'Deaths']].copy()
given_day_top_20_deaths=Given_Day_Deaths.groupby(["Country Name"]).max().sort_values(by=['Deaths'], ascending=False)
given_day_top_20_deaths.head(20)

#### Plotting the top 20 countries with most deaths on the given day

In [None]:
Deaths_Bar_Graph = given_day_top_20_deaths.head(20)
Deaths_Bar_Graph['Country_Name'] = Deaths_Bar_Graph.index
Deaths = px.bar(Deaths_Bar_Graph,x='Country_Name',y="Deaths",color='Country_Name', title='Top 20 countries with most deaths on a given day')
Deaths.show()

### Top 20 Most Infected on a Given Day

In [None]:
Given_Day_Infected = Given_Day[['Country Name', 'Confirmed']].copy()
given_day_top_20_infected=Given_Day_Infected.groupby(["Country Name"]).max().sort_values(by=['Confirmed'], ascending=False)
given_day_top_20_infected.head(20)

#### Plotting the top 20 countries with most Infected on a given day

In [None]:
Infected_Bar_Graph = given_day_top_20_infected.head(20)
Infected_Bar_Graph['Country_Name'] = Infected_Bar_Graph.index
Infected = px.bar(Infected_Bar_Graph,x='Country_Name',y="Confirmed", color ='Country_Name', title= "Top 20 Most Infected Countries")
Infected.show()

### Top 20 Most Recovered on a Given Day

In [None]:
Given_Day_Recovered = Given_Day[['Country Name', 'Deaths','Confirmed']].copy()
Given_Day_Recovered = Given_Day_Recovered.groupby(["Country Name"]).sum(axis = 1, skipna = True)
Given_Day_Recovered.sort_values(by=['Confirmed'], ascending=False, inplace=True)
Given_Day_Recovered['Death_Rate'] = (Given_Day_Recovered['Deaths']/Given_Day_Recovered['Confirmed'])
Given_Day_Recovered['Recovery_Rate'] = 1 - Given_Day_Recovered['Death_Rate']
Given_Day_Recovered['Recovered'] = Given_Day_Recovered['Confirmed'] * Given_Day_Recovered['Recovery_Rate']
given_day_top_20_recovered = Given_Day_Recovered.head(20)
given_day_top_20_recovered.sort_values(by=['Recovered'], ascending=False, inplace=True)
given_day_top_20_recovered.head(20)

### <font color='red'> The above stats means that if Infection Fatality Rate </font>i.e Deaths/Cases = 2.3%<font color='red'> then 2.3% of people infected with Covid-19 have a fatal outcome while 100 - 2.3 = 97.7% of the people recover, hence the Recovered stat is based on this assumption </font>

#### Plotting the top 20 countries with most Recovered

In [None]:
Recovered_Bar_Graph = given_day_top_20_recovered
Recovered_Bar_Graph['Country_Name'] = Recovered_Bar_Graph.index
Recovered = px.bar(Recovered_Bar_Graph,x='Country_Name',y="Recovered", color ='Country_Name', title = "Top 20 Countries with Most Recovered")
Recovered.show()

## 2) Countries with highest new cases and new deaths Between Two dates

### Please enter the date in yyyy-mm-dd format

In [None]:
start_date = '2020-02-15'
end_date = '2020-04-10'

### Highest Deaths Between Two Dates

In [None]:
mask = (covid_data['day'] >= start_date) & (covid_data['day'] <= end_date)
between_dates = covid_data.loc[mask]
Highest_Deaths = between_dates[['Country Name', 'Deaths']].copy()
Highest_Deaths = Highest_Deaths.groupby(["Country Name"]).sum(axis = 1, skipna = True)
Highest_Deaths.sort_values(by=['Deaths'], ascending=False, inplace=True)
Highest_Deaths.head(20)

#### Plotting the above data

In [None]:
Btw_Dat_Hi_De_Bar_Gph = Highest_Deaths.head(20)
Btw_Dat_Hi_De_Bar_Gph['Country_Name'] = Btw_Dat_Hi_De_Bar_Gph.index
Graph = px.bar(Btw_Dat_Hi_De_Bar_Gph,x='Country_Name',y="Deaths",color='Country_Name', title = 'Highest Deaths between 2020-02-15 and 2020-04-10')
Graph.show()

### Highest Infections Between the provided Dates

In [None]:
Highest_Infections = between_dates[['Country Name', 'Confirmed']].copy()
Highest_Infections = Highest_Infections.groupby(["Country Name"]).sum(axis = 1, skipna = True)
Highest_Infections.sort_values(by=['Confirmed'], ascending=False, inplace=True)
Highest_Infections.head(20)

#### Plotting the above data

In [None]:
Btw_Dat_Hi_In_Bar_Gph = Highest_Infections.head(20)
Btw_Dat_Hi_In_Bar_Gph['Country_Name'] = Btw_Dat_Hi_In_Bar_Gph.index
Infection_Graph_Bw_Dates = px.bar(Btw_Dat_Hi_In_Bar_Gph,x='Country_Name',y="Confirmed" ,color='Country_Name', title='Highest new cases from 2020-02-15 to 2020-04-10')
Infection_Graph_Bw_Dates.show()

## 3) Find the starting and ending days of the longest spread period for a given country

In [None]:
Splice_Data = covid_data[['day','Country Name','Confirmed']].copy()

### Please enter the Country Name

In [None]:
Country_Mask = Splice_Data['Country Name'] == 'Italy'

#### Seperating out the data for the given country

In [None]:
Given_Country_Curve = Splice_Data[Country_Mask]

#### Plotting out the curve to get visual representation

In [None]:
Curve = px.line(Given_Country_Curve, x='day', y='Confirmed', title ="Original Curve")
Curve.show()

In [None]:
# Creating an empty dataframe
Longest_Spread = pd.DataFrame(columns=('day', 'Country Name', 'Confirmed'))
#Longest_Spread.head()
Given_Country_Curve = Given_Country_Curve.reset_index()
#len(Given_Country_Curve.index)

## Start processing

##### Getting all the spread periods with consecutive days first

In [None]:
i = 0
j = 1
k = 2
for i in range(0,len(Given_Country_Curve.index),3):

    j = i+1
    k = i+2
    #print(i,j,k)
    if( (k > Given_Country_Curve.index.max()) or (j > Given_Country_Curve.index.max())):
        break
      
        #print(i,j,k)
    elif( ((Given_Country_Curve.iloc[i]['Confirmed'] < Given_Country_Curve.iloc[j]['Confirmed'])
       or (Given_Country_Curve.iloc[j]['Confirmed'] < Given_Country_Curve.iloc[k]['Confirmed']))
       and
        (int((Given_Country_Curve.iloc[k]['day'] - Given_Country_Curve.iloc[j]['day'])/np.timedelta64(1,'D')) ==1
       and int((Given_Country_Curve.iloc[j]['day'] - Given_Country_Curve.iloc[i]['day'])/np.timedelta64(1,'D')) ==1)



      ):

        Longest_Spread.loc[-1] = [Given_Country_Curve.iloc[i]['day'], Given_Country_Curve.iloc[i]['Country Name'], Given_Country_Curve.iloc[i]['Confirmed']]
        Longest_Spread.index = Longest_Spread.index + 1
        Longest_Spread.loc[-1] = [Given_Country_Curve.iloc[j]['day'], Given_Country_Curve.iloc[j]['Country Name'], Given_Country_Curve.iloc[j]['Confirmed']]
        Longest_Spread.index = Longest_Spread.index + 1
        Longest_Spread.loc[-1] = [Given_Country_Curve.iloc[k]['day'], Given_Country_Curve.iloc[k]['Country Name'], Given_Country_Curve.iloc[k]['Confirmed']]
        Longest_Spread.index = Longest_Spread.index + 1
        
    elif( ((Given_Country_Curve.iloc[k]['Confirmed'] < Given_Country_Curve.iloc[j]['Confirmed'])
       or (Given_Country_Curve.iloc[j]['Confirmed'] < Given_Country_Curve.iloc[i]['Confirmed']))
       


        ):
        pass

##### Create groupings of consecutive spread period and identify the longest spread period between the groups

In [None]:
dt = Longest_Spread['day']
day = pd.Timedelta('1d')
in_block = ((dt - dt.shift(-1)).abs() == day) | (dt.diff() == day)
#in_block
filt = Longest_Spread.loc[in_block]
breaks = filt['day'].diff() != day
groups = breaks.cumsum()
for index, row in filt.groupby(groups):
     print(row, end='\n\n')

##### Adding the grouping information to the original dataframe in order to seperate out the longest spread period

In [None]:
Longest_Spread['groups'] = groups
#Longest_Spread.head(100)
Max_Spread_Group = Longest_Spread['groups'].mode()
Max_Spread = Max_Spread_Group[0]
#print(Max_Spread)
Mask =  Longest_Spread['groups']==Max_Spread
Curve = Longest_Spread[Mask]
#Curve.head(1000)

##### Plotting out the longest spread period (please match this curve with the original curve shown few cells above to get better clarity of what is done so far)
##### This contains the period where daily new cases tend to increase and also contains those days
##### where new confirmed cases were relatively lower or none at all

In [None]:
Spread = px.line(Curve, x='day', y='Confirmed', title = 'Longest Spread Period')
Spread.show()

### Starting and ending days of the Longest Spread Period

In [None]:
Longest_Spread_List =[]
for i in range(0,len(Curve.index)):
    Longest_Spread_List.append(Curve.iloc[i]['Confirmed']) 
print(Longest_Spread_List)

### The zeros in the begining are also included and are representing the starting days of the longest spread period

#### And it is elapsing:

In [None]:
print(len(Longest_Spread_List))

#### days starting from day 1

### Now we return the actual elements required

In [None]:
ActualElements = Curve.reset_index()
#ActualElements.head()
ActualElements['Confirmed'] = pd.to_numeric(ActualElements['Confirmed'])
Extraction = ActualElements[['Confirmed']].idxmax()
Index = Extraction[0]
#ActualElements.index[45]
Collecting_Elements = []

##### The commented out statements can be uncommented to see how the conditions are set up and can provide useful insight into how the task is acomplished

In [None]:
temp = 0
for i in range(0,Index):
#     print("outside i = ", i)
    if(
       ActualElements.iloc[i]['Confirmed'] < ActualElements.iloc[i+1]['Confirmed'] 
       and
       ActualElements.iloc[i+1]['Confirmed'] > temp  
       ):
#         print("if i=",i)
#         print("if temp before = ",temp)
#         print("if condition",ActualElements.iloc[i]['Confirmed'],"<",ActualElements.iloc[i+1]['Confirmed'] ,"&",ActualElements.iloc[i+1]['Confirmed'],">",temp )
          Collecting_Elements.append(ActualElements.iloc[i+1]['Confirmed'])
          temp = ActualElements.iloc[i+1]['Confirmed']
    elif (
          ActualElements.iloc[i]['Confirmed'] > ActualElements.iloc[i+1]['Confirmed'] 
          and
          ActualElements.iloc[i]['Confirmed'] > temp
          ):
#         print("elif i=",i)
#         print("elif temp before = ",temp)
#         print("elif condition",ActualElements.iloc[i]['Confirmed'],">",ActualElements.iloc[i+1]['Confirmed'] ,"&",ActualElements.iloc[i]['Confirmed'],">",temp )
          Collecting_Elements.append(ActualElements.iloc[i]['Confirmed'])
          temp = ActualElements.iloc[i]['Confirmed']
        
    
        

#### The Elements are :

In [None]:
print(Collecting_Elements)

## 4) Using "fbprophet" to chart the progression of COVID-19 cases in the top 5 countries for the next 7 days and 30 days.

#### First we identify the top 5 infected countries

In [None]:
Top_5 = covid_data[['Country Name', 'Confirmed']].copy()
Top_5 = Top_5.groupby(["Country Name"]).sum(axis = 1, skipna = True)
Top_5.sort_values(by=['Confirmed'], ascending=False, inplace=True)
Top_5.head(5)

## United States Progression

In [None]:
# Copying only three columns from complete data
Cases = covid_data[['day','Country Name', 'Confirmed']].copy()
# Cases.head()
# Creating Mask for united states data
mask = Cases['Country Name'] == 'United States of America'
# Seperating out United States Data
United_States_of_America = Cases.loc[mask]
# Keeping only the relevant columns of united states for timeseries prediction using prophet
United_States_of_America = United_States_of_America[['day','Confirmed']].copy()
# United_States_of_America.info()
# Preparing for fbprophet
United_States_of_America = United_States_of_America.rename(columns={'day': 'ds','Confirmed': 'y'})
#United_States_of_America.head(100)
United_States_of_America.set_index('ds')
# Convert the column to numeric for training
United_States_of_America['y'] = pd.to_numeric(United_States_of_America['y'])

### Initialize the model and set seasonality to monthly since we dont have one comple year's data

In [None]:
United_States_Model = Prophet(interval_width=0.95)

In [None]:
United_States_Model.add_seasonality(name="monthly",period = 30,fourier_order=5)

### Fit the Data

In [None]:
United_States_Model.fit(United_States_of_America)

### United States Chart the progression (How Next 7 days look like)

In [None]:
next_7_days = United_States_Model.make_future_dataframe(periods=7)
#future_dates.tail(100)
forecast = United_States_Model.predict(next_7_days)
#forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
#forecast.head(1000)
py.init_notebook_mode()

fig = plot_plotly(United_States_Model, forecast)  # This returns a plotly Figure
py.iplot(fig)


### United States Chart the progression (How Next 30 days look like)

In [None]:
next_30_days = United_States_Model.make_future_dataframe(periods=30)
forecast = United_States_Model.predict(next_30_days)
py.init_notebook_mode()
fig = plot_plotly(United_States_Model, forecast)
py.iplot(fig)

### Components of United States Model (Trend, Weekly, Monthly)

In [None]:
py.init_notebook_mode()
United_States_Model.plot_components(forecast)

## United_States_Model Evaluation

In [None]:
us_df_cv = cross_validation(United_States_Model, initial='80 days', horizon = '3 days')
us_df_cv.head()

### United_States_Model Error

### 3 days means its the 3rd day after the cutoff point

In [None]:
us_df_p = performance_metrics(us_df_cv)
#us_df_p.to_csv('error.csv')
us_df_p.head(100)

### Due to horizon = 3 days which is relatively small the horizon defaults to hours in the figure below. This is an open issue in fbprophet and will be fixed in next version

In [None]:
us_fig = plot_cross_validation_metric(us_df_cv, metric='mape')
us_fig.show()

## Spain Progression

In [None]:
Cases = covid_data[['day','Country Name', 'Confirmed']].copy()
mask = Cases['Country Name'] == 'Spain'
Spain = Cases.loc[mask]
Spain = Spain[['day','Confirmed']].copy()
Spain = Spain.rename(columns={'day': 'ds','Confirmed': 'y'})
Spain.set_index('ds')
Spain['y'] = pd.to_numeric(Spain['y'])
Spain_Model = Prophet(interval_width=0.95)
Spain_Model.add_seasonality(name="monthly",period = 30,fourier_order=5)
Spain_Model.fit(Spain)

### Spain Chart the progression (How Next 7 days look like)

In [None]:
spain_next_7_days = Spain_Model.make_future_dataframe(periods=7)
spain_forecast = Spain_Model.predict(spain_next_7_days)
py.init_notebook_mode()
spain_fig = plot_plotly(Spain_Model, spain_forecast)
py.iplot(spain_fig)

### Spain Chart the progression (How Next 30 days look like)

In [None]:
spain_next_30_days = Spain_Model.make_future_dataframe(periods=30)
spain_forecast = Spain_Model.predict(spain_next_30_days)
py.init_notebook_mode()
spain_fig = plot_plotly(Spain_Model, spain_forecast)
py.iplot(spain_fig)

### Components of Spain Model (Trend, Weekly, Monthly)

In [None]:
py.init_notebook_mode()
Spain_Model.plot_components(spain_forecast)

## Spain_Model Evaluation

In [None]:
# Data in initial needs to be roughly 3 times the horizon so check how much data is there for the given country 
Spain.shape

In [None]:
spain_df_cv = cross_validation(Spain_Model, initial='70 days', horizon = '3 days')
spain_df_cv.head()

### Spain_Model Error

In [None]:
spain_df_p = performance_metrics(spain_df_cv)
spain_df_p.head(100)

### Plotting the cross validation error (Metric: RMSE)

In [None]:
spain_fig = plot_cross_validation_metric(spain_df_cv, metric='rmse')
spain_fig.show()

## Italy Progression

In [None]:
Italy_Cases = covid_data[['day','Country Name', 'Confirmed']].copy()
mask = Italy_Cases['Country Name'] == 'Italy'
Italy = Italy_Cases.loc[mask]
Italy = Italy[['day','Confirmed']].copy()
Italy = Italy.rename(columns={'day': 'ds','Confirmed': 'y'})
Italy.set_index('ds')
Italy['y'] = pd.to_numeric(Italy['y'])
Italy_Model = Prophet(interval_width=0.95)
Italy_Model.add_seasonality(name="monthly",period = 30,fourier_order=5)
Italy_Model.fit(Italy)

### Italy Chart the progression (How Next 7 days look like)

In [None]:
Italy_next_7_days = Italy_Model.make_future_dataframe(periods=7)
Italy_forecast = Italy_Model.predict(Italy_next_7_days)
py.init_notebook_mode()
Italy_fig = plot_plotly(Italy_Model, Italy_forecast)
py.iplot(Italy_fig)

### Italy Chart the progression (How Next 30 days look like)

In [None]:
Italy_next_30_days = Italy_Model.make_future_dataframe(periods=30)
Italy_forecast = Italy_Model.predict(Italy_next_30_days)
py.init_notebook_mode()
Italy_fig = plot_plotly(Italy_Model, Italy_forecast)
py.iplot(Italy_fig)

### Components of Italy Model (Trend, Weekly, Monthly)

In [None]:
py.init_notebook_mode()
Italy_Model.plot_components(Italy_forecast)

## Italy_Model Evaluation

In [None]:
Italy.shape

In [None]:
Italy_df_cv = cross_validation(Italy_Model, initial='70 days', horizon = '3 days')
Italy_df_cv.head()

### Italy_Model Error

In [None]:
Italy_df_p = performance_metrics(Italy_df_cv)
Italy_df_p.head(100)

### Plotting the cross validation error (Metric: RMSE)

In [None]:
Italy_fig = plot_cross_validation_metric(Italy_df_cv, metric='rmse')
Italy_fig.show()

## Germany Progression

In [None]:
Germany_Cases = covid_data[['day','Country Name', 'Confirmed']].copy()
mask = Germany_Cases['Country Name'] == 'Germany'
Germany = Germany_Cases.loc[mask]
Germany = Germany[['day','Confirmed']].copy()
Germany = Germany.rename(columns={'day': 'ds','Confirmed': 'y'})
Germany.set_index('ds')
Germany['y'] = pd.to_numeric(Germany['y'])
Germany_Model = Prophet(interval_width=0.95)
Germany_Model.add_seasonality(name="monthly",period = 30,fourier_order=5)
Germany_Model.fit(Germany)

### Germany Chart the progression (How Next 7 days look like)

In [None]:
Germany_next_7_days = Germany_Model.make_future_dataframe(periods=7)
Germany_forecast = Germany_Model.predict(Germany_next_7_days)
py.init_notebook_mode()
Germany_fig = plot_plotly(Germany_Model, Germany_forecast)
py.iplot(Germany_fig)

### Germany Chart the progression (How Next 30 days look like)

In [None]:
Germany_next_30_days = Germany_Model.make_future_dataframe(periods=30)
Germany_forecast = Germany_Model.predict(Germany_next_30_days)
py.init_notebook_mode()
Germany_fig = plot_plotly(Germany_Model, Germany_forecast)
py.iplot(Germany_fig)

### Components of Germany Model (Trend, Weekly, Monthly)

In [None]:
py.init_notebook_mode()
Germany_Model.plot_components(Germany_forecast)

## Germany_Model Evaluation

In [None]:
Germany.shape

In [None]:
Germany_df_cv = cross_validation(Germany_Model, initial='70 days', horizon = '3 days')
Germany_df_cv.head()

### Germany_Model Error

In [None]:
Germany_df_p = performance_metrics(Germany_df_cv)
Germany_df_p.head(100)

### Plotting the cross validation error (Metric: RMSE)

In [None]:
Germany_fig = plot_cross_validation_metric(Germany_df_cv, metric='rmse')
Germany_fig.show()

## France Progression

In [None]:
France_Cases = covid_data[['day','Country Name', 'Confirmed']].copy()
mask = France_Cases['Country Name'] == 'France'
France = France_Cases.loc[mask]
France = France[['day','Confirmed']].copy()
France = France.rename(columns={'day': 'ds','Confirmed': 'y'})
France.set_index('ds')
France['y'] = pd.to_numeric(France['y'])
France_Model = Prophet(interval_width=0.95)
France_Model.add_seasonality(name="monthly",period = 30,fourier_order=5)
France_Model.fit(France)

### France Chart the progression (How Next 7 days look like)

In [None]:
France_next_7_days = France_Model.make_future_dataframe(periods=7)
France_forecast = France_Model.predict(France_next_7_days)
py.init_notebook_mode()
France_fig = plot_plotly(France_Model, France_forecast)
py.iplot(France_fig)

### France Chart the progression (How Next 30 days look like)

In [None]:
France_next_30_days = France_Model.make_future_dataframe(periods=30)
France_forecast = France_Model.predict(France_next_30_days)
py.init_notebook_mode()
France_fig = plot_plotly(France_Model, France_forecast)
py.iplot(France_fig)

### Components of France_Model (Trend, Weekly, Monthly)

In [None]:
py.init_notebook_mode()
France_Model.plot_components(France_forecast)

## France_Model Evaluation

In [None]:
France.shape

In [None]:
France_df_cv = cross_validation(France_Model, initial='80 days', horizon = '3 days')
France_df_cv.head()

### France_Model Error

In [None]:
France_df_p = performance_metrics(France_df_cv)
France_df_p.head(100)

### Plotting the cross validation error (Metric: RMSE)

In [None]:
France_fig = plot_cross_validation_metric(France_df_cv, metric='rmse')
France_fig.show()

# Exploratory Data Analysis

### Most Infected Country (United States) Confirmed Cases & Deaths From Start (20th Jan) to End (16th Apr) 

In [None]:
# Creating Mask for united states data
EDA_mask = covid_data['Country Name'] == 'United States of America'
US_EDA = covid_data[EDA_mask]
US_EDA.head()
fig = px.line(US_EDA, x='day', y='Confirmed',title="Infections in United States of America")
fig.show()

In [None]:
Deaths = px.line(US_EDA, x='day', y='Deaths', title = 'Deaths in United States of America')
Deaths.update_xaxes(rangeslider_visible=True)
Deaths.show()

#### Identifying the above trend line Methametically using first degree polynomial

In [None]:
def trendline(data, order=1):
    coeffs = np.polyfit(data.index.values, list(data), order)
    slope = coeffs[-2]
    return float(slope)




slope = trendline(US_EDA['Confirmed'])
print(slope)

# Summary of World Wide COVID Cases by Infections and Deaths 

In [None]:
df_per_day = covid_data[['day', 'Confirmed','Deaths']].copy()
df_per_day.set_index('day', inplace=True)
df_per_day = df_per_day.sort_values(by = 'day')
df_per_day = df_per_day.groupby('day')[['Confirmed','Deaths']].sum()
df_per_day.describe()

## Plotting Worldwide Infections & Deaths Per day

In [None]:
#colli_deaths = pd.DataFrame({'collisions':collisions_by_date, 'deaths':deaths_by_date })

color1 = '#9467bd'
color2 = '#F08B00'

trace1 = go.Scatter(
    x = df_per_day.index,
    y = df_per_day['Confirmed'],
    mode='lines',
    name='Confirmed',
    line = dict(
        color = color1
    )
)
trace2 = go.Scatter(
    x= df_per_day.index,
    y =df_per_day['Deaths'] ,
    mode='lines',
    name='Deaths',
    line = dict(
        color = color2
    )

)
data = [trace1, trace2]
layout = go.Layout(
    title= "Worldwide Infections & Deaths Per Day",
    yaxis=dict(
        title='Numbers',
        titlefont=dict(
            color=color1
        ),
        tickfont=dict(
            color=color1
        )
    ),
    yaxis2=dict(
        title='deaths',
        overlaying='y',
        side='right',
        titlefont=dict(
            color=color2
        ),
        tickfont=dict(
            color=color2
        )

    ),
    
    xaxis=dict(
        rangeslider=dict(
            visible = True
        ),
        type='date',
    ),

)
fig = go.Figure(data=data, layout=layout)
plot_url = py.iplot(fig)

# Infection Fatality Rate in Top 5 Infected Countries

In [None]:
Death_Rate = covid_data[['Country Name', 'Deaths','Confirmed']].copy()
Death_Rate = Death_Rate.groupby(["Country Name"]).sum(axis = 1, skipna = True)
Death_Rate.sort_values(by=['Confirmed'], ascending=False, inplace=True)
Death_Rate['Death_Rate'] = (Death_Rate['Deaths']/Death_Rate['Confirmed'])*100
Death_Rate.head(5)

### On Average how many days until the Peak is hit

In [None]:
Peak = covid_data[['day','Country Name', 'Confirmed']].copy()
Peak['Confirmed'] = pd.to_numeric(Peak['Confirmed'])
list = ['United States of America','Spain','Italy','Germany','France']
days_count = []
for i in list:
    Country = Peak[Peak['Country Name'] == i]
    Country.sort_values(by=['day'],inplace=True)
    Country.reset_index(drop=True, inplace=True)
    Index = [Country['Confirmed'].idxmax()]
    Index = Index[0]
    days_count.append(Index)
print("Days",days_count)  
print("Average",sum(days_count)/len(days_count)) 

## It takes about 63 days for a contry to hit the peak after which the cases starts to go down

## Worldwide Cumulitive Confirmed

In [None]:
Cumulative_Confirmed = covid_data[['day','Country Name', 'Cumulative Confirmed']].copy()
Cumulative_Confirmed.reset_index()
Cumulative_Confirmed['Cumulative Confirmed'] = pd.to_numeric(Cumulative_Confirmed['Cumulative Confirmed'])
Cumulative_Confirmed = Cumulative_Confirmed.sort_values(by = 'day')
Cumulative_Confirmed = Cumulative_Confirmed.groupby('day')[['Cumulative Confirmed']].sum()
Cumulative_Bar_Graph = Cumulative_Confirmed
Cumulative_Bar_Graph['Country Name'] = Cumulative_Bar_Graph.index
Cumulative = px.bar(Cumulative_Bar_Graph,x=Cumulative_Confirmed.index,y=Cumulative_Confirmed['Cumulative Confirmed'], title = "Cumulative Confirmed Across World")
Cumulative.update_layout(xaxis_title="Dates")
Cumulative.show()

## Worldwide Cumulitive Deaths

In [None]:
Cumulative_Deaths = covid_data[['day','Country Name', 'Cumulative Deaths']].copy()
Cumulative_Deaths.reset_index()
Cumulative_Deaths['Cumulative Deaths'] = pd.to_numeric(Cumulative_Deaths['Cumulative Deaths'])
Cumulative_Deaths = Cumulative_Deaths.sort_values(by = 'day')
Cumulative_Deaths = Cumulative_Deaths.groupby('day')[['Cumulative Deaths']].sum()
Cumulative_Bar_Graph = Cumulative_Deaths
Cumulative_Bar_Graph['Country Name'] = Cumulative_Bar_Graph.index
Cumulative = px.bar(Cumulative_Bar_Graph,x=Cumulative_Deaths.index,y=Cumulative_Deaths['Cumulative Deaths'], title = "Cumulative Deaths Across World")
Cumulative.update_layout(xaxis_title="Dates")
Cumulative.show()

## Interactive Plot (Confirmed Cases Across Different Countries)

In [None]:
def plotcountry(Country):
    EDA_mask = covid_data['Country Name'] == Country
    EDA = covid_data[EDA_mask]
    fig = px.line(EDA, x='day', y='Confirmed',title='Daily Confirmed Cases across '+Country)
    fig.update_layout(showlegend=False)
    return fig


Names = covid_data['Country Name'].unique()
interact(plotcountry, Country=widgets.Dropdown(options=Names))

## Interactive Plot (Deaths Across Different Countries)

In [None]:
def Deaths_Plot(Country):
    EDA_mask = covid_data['Country Name'] == Country
    EDA = covid_data[EDA_mask]
    fig = px.line(EDA, x='day', y='Deaths',title='Daily Deaths across '+Country)
    fig.update_layout(showlegend=False)
    return fig


Names = covid_data['Country Name'].unique()
interact(Deaths_Plot, Country=widgets.Dropdown(options=Names))