**COVID-19** Analysis

The **coronavirus pandemic** is an ongoing pandemic of coronavirus disease 2019 (COVID-19), caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).The outbreak was first identified in Wuhan, Hubei, China, in December 2019, and was recognised as a pandemic by the World Health Organization (WHO) on 11 March 2020.As of 25 March, more than 438,000 cases of COVID-19 have been reported in more than 190 countries and territories, resulting in more than 19,600 deaths and more than 111,000 recoveries.
![https://en.wikipedia.org/wiki/2019%E2%80%9320_coronavirus_pandemic#/media/File:COVID-19_Outbreak_World_Map_per_Capita.svg](http://)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
train = pd.read_csv('../input/covid19-global-forecasting-week-1/train.csv')
test = pd.read_csv('../input/covid19-global-forecasting-week-1/test.csv')

In [None]:
train.head()

In [None]:
test.tail()

In [None]:
train.dtypes

**Changing Date column to datetime**

In [None]:
train['Date']= pd.to_datetime(train['Date']) 
test['Date']= pd.to_datetime(test['Date']) 

**Set the index as date**

In [None]:
new_train = train.set_index(['Date'])
new_test = test.set_index(['Date'])

In [None]:
new_test.head()

In [None]:
new_train.head()

In [None]:
new_train.isnull().sum()

In [None]:
new_train.info()

*Dropping Id*

In [None]:
new_test = new_test.drop(["ForecastId"], axis=1)

In [None]:
new_train = new_train.drop(["Id"], axis=1)

**Filling missing values**

In [None]:
new_train[['Province/State']] = new_train[['Province/State']].fillna('')

In [None]:
new_train.isnull().sum()

In [None]:
new_test[['Province/State']] = new_test[['Province/State']].fillna('')

In [None]:
new_test.isnull().sum()

In [None]:
import plotly.express as px

In [None]:
# Creating a dataframe with total no of cases for every country
confirmiedcases = pd.DataFrame(new_train.groupby('Country/Region')['ConfirmedCases'].sum())
confirmiedcases['Country/Region'] = confirmiedcases.index

In [None]:
confirmiedcases.index

In [None]:
confirmiedcases.index = np.arange(1,164)

In [None]:
global_confirmiedcases = confirmiedcases[['Country/Region','ConfirmedCases']]

In [None]:
fig = px.bar(global_confirmiedcases.sort_values('ConfirmedCases',ascending=False)[:20][::-1],
             x='ConfirmedCases',y='Country/Region',title='Confirmed Cases Worldwide',text='ConfirmedCases', height=900, orientation='h')
fig.show()

In [None]:
formated_gdf = new_train.groupby(['Date', 'Country/Region'])['ConfirmedCases'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['ConfirmedCases'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="ConfirmedCases", size='size', hover_name="Country/Region", 
                     range_color= [0, 1500], 
                     projection="natural earth", animation_frame="Date", 
                     title='CORONA: Spread Over Time From Jan 2020 to Mar 2020', color_continuous_scale="portland")
fig.show()

In [None]:
new_train.head()

In [None]:
new_test.head()

In [None]:
new_train["Country/Region"].unique()

**Creating Time Features**

In [None]:
def create_time_features(df):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    return X

In [None]:
create_time_features(new_train)
create_time_features(new_test)

In [None]:
new_train.head()

In [None]:
new_train.drop("date", axis=1, inplace=True)
new_test.drop("date", axis=1, inplace=True)

In [None]:
new_test.head()

**OHE of Province/State & Country/Region**

In [None]:
new_train = pd.concat([new_train,pd.get_dummies(new_train['Province/State'], prefix='ps')],axis=1)
new_train.drop(['Province/State'],axis=1, inplace=True)
new_test = pd.concat([new_test,pd.get_dummies(new_test['Province/State'], prefix='ps')],axis=1)
new_test.drop(['Province/State'],axis=1, inplace=True)

In [None]:
new_train = pd.concat([new_train,pd.get_dummies(new_train['Country/Region'], prefix='cr')],axis=1)
new_train.drop(['Country/Region'],axis=1, inplace=True)
new_test = pd.concat([new_test,pd.get_dummies(new_test['Country/Region'], prefix='cr')],axis=1)
new_test.drop(['Country/Region'],axis=1, inplace=True)

In [None]:
new_train.head()

In [None]:
y_train = new_train["Fatalities"]

In [None]:
X_train = new_train.drop(["Fatalities", "ConfirmedCases"], axis=1)

**XGBoost**

In [None]:
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
reg = xgb.XGBRegressor(n_estimators=1000)

In [None]:
reg.fit(X_train, y_train, verbose=True)

In [None]:
plot = plot_importance(reg, height=0.9, max_num_features=20)

In [None]:
y_train = train["ConfirmedCases"]

In [None]:
reg = xgb.XGBRegressor(n_estimators=1000)

In [None]:
reg.fit(X_train, y_train, verbose=True)

In [None]:
plot = plot_importance(reg, height=0.9, max_num_features=20)

In [None]:
y_train = train.groupby(["Country/Region"]).ConfirmedCases.pct_change(periods=1)

In [None]:
y_train = y_train.replace(np.nan, 0)

In [None]:
y_train = y_train.replace(np.inf, 0)

In [None]:
reg = xgb.XGBRegressor(n_estimators=1000)

In [None]:
reg.fit(X_train, y_train, verbose=True)

In [None]:
plot = plot_importance(reg, height=0.9, max_num_features=20)

In [None]:
y_train = train["ConfirmedCases"]
confirmed_reg = xgb.XGBRegressor(n_estimators=1000)
confirmed_reg.fit(X_train, y_train, verbose=True)
preds = confirmed_reg.predict(new_test)
preds = np.array(preds)
preds[preds < 0] = 0
preds = np.round(preds, 0)

In [None]:
preds = np.array(preds)

In [None]:
preds

In [None]:
submission_new = pd.read_csv("../input/covid19-global-forecasting-week-1/submission.csv")

In [None]:
submission_new.head()

In [None]:
submission_new["ConfirmedCases"]=pd.Series(preds)

In [None]:
y_train = train["Fatalities"]
confirmed_reg = xgb.XGBRegressor(n_estimators=1000)
confirmed_reg.fit(X_train, y_train, verbose=True)
preds = confirmed_reg.predict(new_test)
preds = np.array(preds)
preds[preds < 0] = 0
preds = np.round(preds, 0)
submission_new["Fatalities"]=pd.Series(preds)

In [None]:
submission_new

In [None]:
submission_new.to_csv('submission.csv',index=False)