# COVID-19 (Corona Virus Disease 2019)

<img src="https://harfordcountyhealth.com/wp-content/uploads/2020/01/home-banner.jpg">

Coronavirus disease (COVID-19) is an infectious disease caused by a new virus.
The disease causes respiratory illness (like the flu) with symptoms such as a cough, fever, and in more severe cases, difficulty breathing.

## Identification of Relevant Datasets

In [None]:
import numpy as np
import pandas as pd

dfTrain = pd.read_csv("../input/covid19-global-forecasting-week-1/train.csv")
dfTest = pd.read_csv("../input/covid19-global-forecasting-week-1/test.csv")

In [None]:
dfTrain['Date'] = dfTrain['Date'].apply(lambda x: int(x.replace('-','')))
dfTest['Date'] = dfTest['Date'].apply(lambda x: int(x.replace('-','')))

## Data Cleansing

In [None]:
dfTrain = dfTrain.drop(columns=['Province/State'])

In [None]:
dfTrain.isnull().sum()

In [None]:
dfTrain.head()

In [None]:
dfTrain.dtypes

## Preprocessing and Preparing Data for Modeling 

In [None]:
#Asign columns for training and testing
X_train =dfTrain[['Lat', 'Long', 'Date']]
y1_train = dfTrain[['ConfirmedCases']]
y2_train = dfTrain[['Fatalities']]
X_test = dfTest[['Lat', 'Long', 'Date']]

### Random Forest Classifier Model

In [None]:
#We are going to use Random Forest classifier for the forecast
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=200)

In [None]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                      max_depth=None, max_features='auto', max_leaf_nodes=None, 
                      n_estimators=150, random_state=None, n_jobs=1, verbose=0)

In [None]:
model.fit(X_train,y1_train)
pred1_rf = model.predict(X_test)
pred1_rf = pd.DataFrame(pred1_rf)
pred1_rf.columns = ["ConfirmedCases_prediction"]

In [None]:
pred1_rf.head()

In [None]:
model.fit(X_train,y2_train)
pred2_rf = model.predict(X_test)
pred2_rf = pd.DataFrame(pred2_rf)
pred2_rf.columns = ["Fatalities_prediction"]

In [None]:
pred2_rf.head()

### XGBoost Regressor Model

In [None]:
from xgboost import XGBRegressor
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

from sklearn.metrics import mean_absolute_error
model2 = XGBRegressor(n_estimators=1000,learning_rate=0.1,objective='reg:squarederror')

In [None]:
y1_train = y1_train.replace(np.nan, 0)
model2.fit(X_train,y1_train)
pred1_xgb = model2.predict(X_test)
pred1_xgb = pd.DataFrame(pred1_xgb)
pred1_xgb.columns = ["ConfirmedCases_prediction"]

In [None]:
pred1_xgb.head()

In [None]:
y2_train = y2_train.replace(np.nan, 0)
model2.fit(X_train,y2_train)
pred2_xgb = model2.predict(X_test)
pred2_xgb = pd.DataFrame(pred2_xgb)
pred2_xgb.columns = ["Fatalities_prediction"]

In [None]:
pred2_xgb.head()

In [None]:
submissionOriginal = pd.read_csv("../input/covid19-global-forecasting-week-1/submission.csv")

In [None]:
submissionOriginal

In [None]:
submissionOriginal["Fatalities"] = pred2_xgb["Fatalities_prediction"]
pred2_xgb.drop("Fatalities_prediction", axis=1, inplace=True)