# Imports

In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import warnings

import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
import seaborn as sns
import squarify
import plotly.offline as py
import plotly_express as px

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

from IPython.display import Image
warnings.filterwarnings('ignore')
%matplotlib inline

# Initializations

In [2]:
india_covid_19 = pd.read_csv('../input/covid19-in-india/covid_19_india.csv')
state_testing = pd.read_csv('../input/covid19-in-india/StatewiseTestingDetails.csv')

world_population = pd.read_csv('../input/population-by-country-2020/population_by_country_2020.csv')
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-04-2020.csv')

india_covid_19['Date'] = pd.to_datetime(india_covid_19['Date'],dayfirst = True)
state_testing['Date'] = pd.to_datetime(state_testing['Date'])

dates = list(confirmed_df.columns[4:])
dates = list(pd.to_datetime(dates))
dates_to_visualize = dates[8:]

global_confirmed = {}
global_deaths = {}
global_recovered = {}
global_active= {}

global_confirmed_list = []
global_recovered_list = []
global_deaths_list = []
global_active_list = []

df1 = confirmed_df.groupby('Country/Region').sum().reset_index()
df2 = deaths_df.groupby('Country/Region').sum().reset_index()
df3 = recovered_df.groupby('Country/Region').sum().reset_index()

train = pd.read_csv('../input/covid19-global-forecasting-week-4/train.csv')
test = pd.read_csv('../input/covid19-global-forecasting-week-4/test.csv')
train['Date'] = pd.to_datetime(train['Date'])
test['Date'] = pd.to_datetime(test['Date'])

country_dict={}
'China','Germany','Iran','Italy','Spain','US','Korea, South','France','Turkey','United Kingdom','India'
country_dict['1']='China'
country_dict['2']='Germany'
country_dict['3']='Iran'
country_dict['4']='Italy'
country_dict['5']='Spain'
country_dict['6']='US'
country_dict['7']='Korea, South'
country_dict['8']='France'
country_dict['9']='Turkey'
country_dict['10']='United Kingdom'
country_dict['11']='India'


# 1. World Updates  <a id="32"></a>


In [3]:
def get_world_updates():
    world_confirmed = confirmed_df[confirmed_df.columns[-1:]].sum()
    world_recovered = recovered_df[recovered_df.columns[-1:]].sum()
    world_deaths = deaths_df[deaths_df.columns[-1:]].sum()
    world_active = world_confirmed - (world_recovered - world_deaths)

    labels = ['Active','Recovered','Deceased']
    sizes = [world_active,world_recovered,world_deaths]
    color= ['#66b3ff','green','red']
    explode = []

    for i in labels:
        explode.append(0.05)

    plt.figure(figsize= (15,10))
    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=9, explode =explode,colors = color)
    centre_circle = plt.Circle((0,0),0.70,fc='white')

    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    plt.title('World COVID-19 Cases',fontsize = 20)
    plt.axis('equal')  
    plt.tight_layout()


# ![](http://)![](http://)**1.1. Active, Recovered, Deaths in hotspot countries**

In [4]:
def get_hotspot_country_details():
    hotspots = ['China','Germany','Iran','Italy','Spain','US','Korea, South','France','Turkey','United Kingdom','India']
    dates = list(confirmed_df.columns[4:])
    dates = list(pd.to_datetime(dates))
    dates_to_visualize = dates[8:]

    df1 = confirmed_df.groupby('Country/Region').sum().reset_index()
    df2 = deaths_df.groupby('Country/Region').sum().reset_index()
    df3 = recovered_df.groupby('Country/Region').sum().reset_index()


    for country in hotspots:
        k =df1[df1['Country/Region'] == country].loc[:,'1/30/20':]
        global_confirmed[country] = k.values.tolist()[0]

        k =df2[df2['Country/Region'] == country].loc[:,'1/30/20':]
        global_deaths[country] = k.values.tolist()[0]

        k =df3[df3['Country/Region'] == country].loc[:,'1/30/20':]
        global_recovered[country] = k.values.tolist()[0]

    for country in hotspots:
        k = list(map(int.__sub__, global_confirmed[country], global_deaths[country]))
        global_active[country] = list(map(int.__sub__, k, global_recovered[country]))

    fig = plt.figure(figsize= (15,15))
    plt.suptitle('Active, Recovered, Deaths in Hotspot Countries and India as of May 15',fontsize = 20,y=1.0)
    #plt.legend()
    k=0
    for i in range(1,12):
        ax = fig.add_subplot(6,2,i)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%b'))
        ax.bar(dates_to_visualize,global_active[hotspots[k]],color = 'green',alpha = 0.6,label = 'Active');
        ax.bar(dates_to_visualize,global_recovered[hotspots[k]],color='grey',label = 'Recovered');
        ax.bar(dates_to_visualize,global_deaths[hotspots[k]],color='red',label = 'Death');   
        plt.title(hotspots[k])
        handles, labels = ax.get_legend_handles_labels()
        fig.legend(handles, labels, loc='upper left')
        k=k+1

    plt.tight_layout(pad=3.0)

# 1.2. Death rate per million in hotspot countries

In [5]:
def get_death_rate_per_million():
    hotspots = ['China','Germany','Iran','Italy','Spain','United States','South Korea','France','Turkey','United Kingdom','India']
    country_death_rate = pd.DataFrame(columns = ['country','day1','day2','day3'])
    world_population['Population (2020)'] = world_population['Population (2020)']/1000000

    d1=[]
    d2 =[]
    d3 = []
    for country in hotspots:
        p = float(world_population[world_population['Country (or dependency)'] == country ]['Population (2020)'])
        if country == 'United States':
            k = global_deaths['US'][-3:]
        elif country == 'South Korea':
            k = global_deaths['Korea, South'][-3:]
        else:
            k = global_deaths[country][-3:]
        d1.append(round(k[0]/p,2))
        d2.append(round(k[1]/p,2))
        d3.append(round(k[2]/p,2))

    country_death_rate['country'] = hotspots
    country_death_rate['day1'] = d1
    country_death_rate['day2'] = d2
    country_death_rate['day3'] = d3    

    plt.figure(figsize= (10,10))
    plt.hlines(y=country_death_rate['country'], xmin=country_death_rate['day1'], xmax=country_death_rate['day3'], color='grey', alpha=0.4);
    plt.scatter(country_death_rate['day1'], country_death_rate['country'], color='skyblue', label='13th May');
    plt.scatter(country_death_rate['day2'], country_death_rate['country'], color='green', label='14th May');
    plt.scatter(country_death_rate['day3'], country_death_rate['country'], color='red', label='15th May');
    plt.legend();
    plt.title("Death Rate per Million in Hotspot Countries",fontsize=20);
    plt.xlabel('Death Rate per Million');


# 1.3. Comparision between different countries[](http://)

In [6]:
def get_comparision_between_countries():
    countries = ['China','Germany','Iran','Italy','Spain','US','Korea, South','France','Turkey','United Kingdom','India']


    for country in countries:
        k =df1[df1['Country/Region'] == country].loc[:,'1/30/20':]
        global_confirmed_list.append(k.values.tolist()[0]) 

        k =df2[df2['Country/Region'] == country].loc[:,'1/30/20':]
        global_deaths_list.append(k.values.tolist()[0]) 

        k =df3[df3['Country/Region'] == country].loc[:,'1/30/20':]
        global_deaths_list.append(k.values.tolist()[0])  

    plt.figure(figsize= (15,10))
    plt.xticks(rotation = 90 ,fontsize = 11)
    plt.yticks(fontsize = 10)
    plt.xlabel("Dates",fontsize = 20)
    plt.ylabel('Total cases',fontsize = 20)
    plt.title("Comparison with other Countries" , fontsize = 20)

    for i in range(len(countries)):
        plt.plot_date(y= global_confirmed_list[i],x= dates_to_visualize,label = countries[i],linestyle ='-')
    plt.legend();

# 2. Prediction and Visualization[](http://)

# 2.1. Country wise Visualization <a id="1"></a>

In [7]:
def get_country_visualization(countryName):
    df1 = confirmed_df.groupby('Country/Region').sum().reset_index()
    df2 = deaths_df.groupby('Country/Region').sum().reset_index()
    df3 = recovered_df.groupby('Country/Region').sum().reset_index()

    k = df1[df1['Country/Region']==countryName].loc[:,'1/30/20':]
    india_confirmed = k.values.tolist()[0] 

    k = df2[df2['Country/Region']==countryName].loc[:,'1/30/20':]
    india_deaths = k.values.tolist()[0] 

    k = df3[df3['Country/Region']==countryName].loc[:,'1/30/20':]
    india_recovered = k.values.tolist()[0] 

    plt.figure(figsize= (15,10))
    plt.xticks(rotation = 90 ,fontsize = 11)
    plt.yticks(fontsize = 10)
    plt.xlabel("Dates",fontsize = 20)
    plt.ylabel('Total cases',fontsize = 20)
    title="Total Confirmed, Active, Death in "+countryName
    plt.title(title , fontsize = 20)

    ax1 = plt.plot_date(y= india_confirmed,x= dates_to_visualize,label = 'Confirmed',linestyle ='-',color = 'b')
    ax2 = plt.plot_date(y= india_recovered,x= dates_to_visualize,label = 'Recovered',linestyle ='-',color = 'g')
    ax3 = plt.plot_date(y= india_deaths,x= dates_to_visualize,label = 'Death',linestyle ='-',color = 'r')
    plt.legend();

# 2.2. Country wise Prediction <a id="7"></a>

## 2.2.1. Prediction using growth factor

In [8]:
def get_prediction_using_growth_factor(countryName):
    df1 = confirmed_df.groupby('Country/Region').sum().reset_index()
    df2 = deaths_df.groupby('Country/Region').sum().reset_index()
    df3 = recovered_df.groupby('Country/Region').sum().reset_index()

    k =df1[df1['Country/Region']==countryName].loc[:,'2/4/20':]
    india_confirmed = k.values.tolist()[0] 

    growth_diff = []

    for i in range(1,len(india_confirmed)):
        growth_diff.append(india_confirmed[i] / india_confirmed[i-1])

    growth_factor = sum(growth_diff)/len(growth_diff)
    print('Average growth factor',growth_factor)

    prediction_dates = []

    start_date = dates_to_visualize[len(dates_to_visualize) - 1]
    for i in range(15):
        date = start_date + datetime.timedelta(days=1)
        prediction_dates.append(date)
        start_date = date
    previous_day_cases = global_confirmed_list[5][len(dates_to_visualize) - 1]
    predicted_cases = []

    for i in range(15):
        predicted_value = previous_day_cases *  growth_factor
        predicted_cases.append(predicted_value)
        previous_day_cases = predicted_value

    plt.figure(figsize= (15,10))
    plt.xticks(rotation = 90 ,fontsize = 11)
    plt.yticks(fontsize = 10)
    plt.xlabel("Dates",fontsize = 20)
    plt.ylabel('Total cases',fontsize = 20)
    title="Predicted Values for the next 15 Days for "+countryName
    plt.title(title , fontsize = 20)
    ax1 = plt.plot_date(y= predicted_cases,x= prediction_dates,linestyle ='-',color = 'c')

## 2.2.2. Prediction using Prophet Model <a id="10"></a>

In [9]:
def get_prediction_using_prophet(countryName):
    k = df1[df1['Country/Region']==countryName].loc[:,'1/22/20':]
    india_confirmed = k.values.tolist()[0] 
    data = pd.DataFrame(columns = ['ds','y'])
    data['ds'] = dates
    data['y'] = india_confirmed

    prop=Prophet()
    prop.fit(data)
    future=prop.make_future_dataframe(periods=30)
    prop_forecast=prop.predict(future)
    forecast = prop_forecast[['ds','yhat']].tail(30)

    fig = plot_plotly(prop, prop_forecast)
    fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')

## 2.2.3. Prediction using ARIMA <a id="11"></a>

In [10]:
def get_prediction_using_ARIMA(countryName):
    k = df1[df1['Country/Region']==countryName].loc[:,'1/22/20':]
    india_confirmed = k.values.tolist()[0] 
    data = pd.DataFrame(columns = ['ds','y'])
    data['ds'] = dates
    data['y'] = india_confirmed
    arima = ARIMA(data['y'], order=(5, 1, 0))
    arima = arima.fit(trend='c', full_output=True, disp=True)
    forecast = arima.forecast(steps= 30)
    pred = list(forecast[0])

    start_date = data['ds'].max()
    prediction_dates = []
    for i in range(30):
        date = start_date + datetime.timedelta(days=1)
        prediction_dates.append(date)
        start_date = date
    plt.figure(figsize= (15,10))
    plt.xlabel("Dates",fontsize = 20)
    plt.ylabel('Total cases',fontsize = 20)
    title= "Predicted Values for the next 15 Days for "+countryName
    plt.title(title , fontsize = 20)

    plt.plot_date(y= pred,x= prediction_dates,linestyle ='dashed',color = '#ff9999',label = 'Predicted');
    plt.plot_date(y=data['y'],x=data['ds'],linestyle = '-',color = 'blue',label = 'Actual');
    plt.legend();

# User interaction code

In [11]:
print("Do you want to see world wide covid-19 updates(YES/NO)?")
answer= input()
if(answer=='YES'):
    get_world_updates()
    get_hotspot_country_details()
    get_death_rate_per_million()
    get_comparision_between_countries()

Do you want to see world wide covid-19 updates(YES/NO)?


 NO


In [None]:
print("Select a country from the list of hotspot countries to predict the cases for next 15 days")
for number, country in country_dict.items(): 
    print(number, ":", country) 
answer=input()
get_country_visualization(country_dict[answer])
get_prediction_using_growth_factor(country_dict[answer])
get_prediction_using_prophet(country_dict[answer])
get_prediction_using_ARIMA(country_dict[answer])