In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

## Confirmed Cases Reading & Preprocessing

In [69]:
# Read The Confirmed Cases Data
confirmed_data = pd.read_csv('data/covid19_confirmed.csv')

In [70]:
# Filter Sri Lanka & Columns
confirmed_data = confirmed_data.loc[confirmed_data['Country/Region'] == 'Sri Lanka']

In [71]:
# Drop Columns
confirmed_data = confirmed_data.drop(columns=['Province/State', 'Country/Region', 'Lat', 'Long'])
confirmed_data

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20
202,0,0,0,0,0,1,1,1,1,1,...,368,420,460,523,588,619,649,663,690,705


In [72]:
# Create New DataFrame
confirmed_data = pd.DataFrame({'Date': confirmed_data.columns.values, 'Confirmed': confirmed_data.values.flatten()})

In [73]:
# Parse Date
confirmed_data['Date'] = pd.to_datetime(confirmed_data['Date'])

In [74]:
# Set Date as Index 
confirmed_data.set_index('Date', inplace = True)
confirmed_data

Unnamed: 0_level_0,Confirmed
Date,Unnamed: 1_level_1
2020-01-22,0
2020-01-23,0
2020-01-24,0
2020-01-25,0
2020-01-26,0
...,...
2020-04-28,619
2020-04-29,649
2020-04-30,663
2020-05-01,690


## Recovered Cases Reading & Preprocessing 

In [75]:
# Reading The Recovered Cases
recovered_data = pd.read_csv('data/covid19_recovered.csv')

In [76]:
# Filter Sri Lanka & Columns
recovered_data = recovered_data[recovered_data['Country/Region'] == 'Sri Lanka']

In [77]:
# Drop Columns
recovered_data = recovered_data.drop(columns=['Province/State', 'Country/Region', 'Lat', 'Long'])
recovered_data

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20
200,0,0,0,0,0,0,0,0,0,0,...,107,109,118,120,126,134,136,154,162,172


In [78]:
# Create New DataFrame
recovered_data = pd.DataFrame({'Date': recovered_data.columns.values, 'Recovered': recovered_data.values.flatten()})

In [79]:
# Parse Date
recovered_data['Date'] = pd.to_datetime(recovered_data['Date'])

In [80]:
# Set Date as Index 
recovered_data.set_index('Date', inplace = True)
recovered_data

Unnamed: 0_level_0,Recovered
Date,Unnamed: 1_level_1
2020-01-22,0
2020-01-23,0
2020-01-24,0
2020-01-25,0
2020-01-26,0
...,...
2020-04-28,134
2020-04-29,136
2020-04-30,154
2020-05-01,162


## Deaths Cases Reading & Preprocessing

In [81]:
# Reading The Deaths Cases
death_data = pd.read_csv('data/covid19_deaths.csv')
death_data

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20
0,,Afghanistan,33.000000,65.000000,0,0,0,0,0,0,...,42,43,47,50,57,58,60,64,68,72
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,27,27,27,28,28,30,30,31,31,31
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,407,415,419,425,432,437,444,450,453,459
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,37,40,40,40,40,41,42,42,43,44
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,,Western Sahara,24.215500,-12.885800,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
262,,Sao Tome and Principe,0.186360,6.613081,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
263,,Yemen,15.552727,48.516388,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2,2,2
264,,Comoros,-11.645500,43.333300,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [82]:
# Filter Sri Lanka & Columns
death_data = death_data[death_data['Country/Region'] == 'Sri Lanka']

In [83]:
# Drop Columns
death_data = death_data.drop(columns=['Province/State', 'Country/Region', 'Lat', 'Long'])
death_data

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20,4/29/20,4/30/20,5/1/20,5/2/20
202,0,0,0,0,0,0,0,0,0,0,...,7,7,7,7,7,7,7,7,7,7


In [84]:
# Create New DataFrame
death_data = pd.DataFrame({'Date': death_data.columns.values, 'Deaths': death_data.values.flatten()})

In [85]:
# Parse Date
death_data['Date'] = pd.to_datetime(death_data['Date'])

In [86]:
# Set Date as Index 
death_data.set_index('Date', inplace = True)
death_data

Unnamed: 0_level_0,Deaths
Date,Unnamed: 1_level_1
2020-01-22,0
2020-01-23,0
2020-01-24,0
2020-01-25,0
2020-01-26,0
...,...
2020-04-28,7
2020-04-29,7
2020-04-30,7
2020-05-01,7


### Joining DataFrames

In [112]:
covid19_data = pd.DataFrame({'Date': confirmed_data.index.values, 'Confirmed': confirmed_data.values.flatten(), 'Recovered': recovered_data.values.flatten(), 'Death': death_data.values.flatten()})
covid19_data.set_index('Date')

Unnamed: 0_level_0,Confirmed,Recovered,Death
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,0,0,0
2020-01-23,0,0,0
2020-01-24,0,0,0
2020-01-25,0,0,0
2020-01-26,0,0,0
...,...,...,...
2020-04-28,619,134,7
2020-04-29,649,136,7
2020-04-30,663,154,7
2020-05-01,690,162,7


In [114]:
# Check For NAN OR Missing Values
covid19_data.isna().any()

Date         False
Confirmed    False
Recovered    False
Death        False
dtype: bool

In [115]:
# Info
covid19_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       102 non-null    datetime64[ns]
 1   Confirmed  102 non-null    int64         
 2   Recovered  102 non-null    int64         
 3   Death      102 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 3.3 KB
