## Coronavirus (Covid-19) Visualization & Prediction

In [2]:
# required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import random
import time
import math
import datetime
import operator

from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings(action = 'ignore', category = UserWarning)

plt.style.use('seaborn-poster')

In [3]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
# recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-12-2022.csv')
us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/07-12-2022.csv')

In [3]:
confirmed_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/4/22,7/5/22,7/6/22,7/7/22,7/8/22,7/9/22,7/10/22,7/11/22,7/12/22,7/13/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,182793,182793,182979,183084,183221,183235,183265,183268,183272,183285
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,282690,282690,283811,284758,285731,286732,287984,288176,289391,290954
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,266173,266173,266181,266202,266228,266246,266257,266274,266303,266328
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,44177,44177,44671,44671,44671,44671,44671,44671,44671,44671
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,101320,101320,101320,101320,101320,101320,101320,101320,101320,101320


In [11]:
deaths_df.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/4/22,7/5/22,7/6/22,7/7/22,7/8/22,7/9/22,7/10/22,7/11/22,7/12/22,7/13/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,7725,7725,7725,7727,7728,7728,7728,7728,7728,7728
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,3502,3502,3506,3508,3510,3512,3514,3514,3516,3517
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,6875,6875,6875,6875,6875,6875,6875,6875,6875,6875
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,153,153,153,153,153,153,153,153,153,153
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,1900,1900,1900,1900,1900,1900,1900,1900,1900,1900


In [7]:
latest_data.sample(5)

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
737,1123.0,Tallapoosa,Alabama,US,2022-07-13 04:20:58,32.866983,-85.798331,12023,238,,,"Tallapoosa, Alabama, US",29784.229693,1.979539
3276,47063.0,Hamblen,Tennessee,US,2022-07-13 04:20:58,36.2199,-83.265862,20402,364,,,"Hamblen, Tennessee, US",31419.595281,1.784139
3858,55003.0,Ashland,Wisconsin,US,2022-07-13 04:20:58,46.319569,-90.678371,3667,39,,,"Ashland, Wisconsin, US",23563.809279,1.06354
3995,,,Montserrat,United Kingdom,2022-07-13 04:20:58,16.742498,-62.187366,1023,8,,,"Montserrat, United Kingdom",20464.092819,0.782014
2966,41055.0,Sherman,Oregon,US,2022-07-13 04:20:58,45.415016,-120.682783,353,6,,,"Sherman, Oregon, US",19831.460674,1.699717


In [10]:
us_medical_data.tail(5)

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,Total_Test_Results,People_Hospitalized,Case_Fatality_Ratio,UID,ISO3,Testing_Rate,Hospitalization_Rate
53,Virginia,US,2022-07-13 04:31:10,37.7693,-78.17,1902133,20755,,,51.0,22284.913196,14295761.0,,1.091143,84000051.0,USA,167485.550674,
54,Washington,US,2022-07-13 04:31:10,47.4009,-121.4905,1692004,13362,,,53.0,22219.668746,,,0.789714,84000053.0,USA,,
55,West Virginia,US,2022-07-13 04:31:10,38.4912,-80.9545,540192,7091,,,54.0,30142.170257,6561001.0,,1.312681,84000054.0,USA,366097.256531,
56,Wisconsin,US,2022-07-13 04:31:10,44.2685,-89.6165,1744108,14842,,,55.0,29954.963852,17071346.0,,0.850979,84000055.0,USA,293199.47637,
57,Wyoming,US,2022-07-13 04:31:10,42.756,-107.3025,167386,1845,,,56.0,28921.537289,1454012.0,,1.102243,84000056.0,USA,251229.268141,


In [12]:
confirmed_df.shape, deaths_df.shape, latest_data.shape, us_medical_data.shape

((285, 908), (285, 908), (4012, 14), (58, 18))

In [13]:
confirmed_df.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Columns: 908 entries, Province/State to 7/13/22
dtypes: float64(2), int64(904), object(2)
memory usage: 2.0 MB


In [15]:
deaths_df.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Columns: 908 entries, Province/State to 7/13/22
dtypes: float64(2), int64(904), object(2)
memory usage: 2.0 MB


In [16]:
latest_data.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4012 entries, 0 to 4011
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   FIPS                 3268 non-null   float64
 1   Admin2               3272 non-null   object 
 2   Province_State       3835 non-null   object 
 3   Country_Region       4012 non-null   object 
 4   Last_Update          4012 non-null   object 
 5   Lat                  3921 non-null   float64
 6   Long_                3921 non-null   float64
 7   Confirmed            4012 non-null   int64  
 8   Deaths               4012 non-null   int64  
 9   Recovered            0 non-null      float64
 10  Active               0 non-null      float64
 11  Combined_Key         4012 non-null   object 
 12  Incident_Rate        3918 non-null   float64
 13  Case_Fatality_Ratio  3969 non-null   float64
dtypes: float64(7), int64(2), object(5)
memory usage: 1.6 MB


In [17]:
us_medical_data.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58 entries, 0 to 57
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Province_State        58 non-null     object 
 1   Country_Region        58 non-null     object 
 2   Last_Update           58 non-null     object 
 3   Lat                   56 non-null     float64
 4   Long_                 56 non-null     float64
 5   Confirmed             58 non-null     int64  
 6   Deaths                58 non-null     int64  
 7   Recovered             0 non-null      float64
 8   Active                0 non-null      float64
 9   FIPS                  58 non-null     float64
 10  Incident_Rate         56 non-null     float64
 11  Total_Test_Results    38 non-null     float64
 12  People_Hospitalized   0 non-null      float64
 13  Case_Fatality_Ratio   58 non-null     float64
 14  UID                   58 non-null     float64
 15  ISO3                  58 

In [11]:
cols = confirmed_df.keys() # confirmed_df.columns
cols

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       ...
       '7/4/22', '7/5/22', '7/6/22', '7/7/22', '7/8/22', '7/9/22', '7/10/22',
       '7/11/22', '7/12/22', '7/13/22'],
      dtype='object', length=908)

In [12]:
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]

In [13]:
dates = confirmed.columns
world_cases = []
total_deaths = []
mortality_rate = []

for i in dates:
    confirmed_sum = confirmed[i].sum()
    death_sum = deaths[i].sum()
    
    world_cases.append(confirmed_sum)
    total_deaths.append(death_sum)
    
    # calculate rates
    mortality_rate.append(death_sum / confirmed_sum)

In [14]:
world_cases[-1], total_deaths[-1], mortality_rate[-1]

(559534585, 6363300, 0.011372487368229437)

In [29]:
def daily_increase(data):
    d = []
    for i in range(len(data)):
        if i == 0:
            d.append(data[0])
        else:
            d.append(data[i] - data[i - 1])
    return d

def moving_average(data, window_size):
    moving_average = []
    for i in range(len(data)):
        if i + window_size < len(data):
            moving_average.append(np.mean(data[i:i+window_size]))
        else:
            moving_average.append(np.mean(data[:len(data)]))
    return moving_average

# window size
window = 7

# confirmed cases
world_daily_increase = daily_increase(world_cases)
world_confirmed_avg = moving_average(world_cases, window)
world_daily_increase_avg = moving_average(world_daily_increase, window)

# deaths
world_daily_death = daily_increase(total_deaths)
world_death_avg = moving_average(total_deaths, window)
world_daily_death_avg = moving_average(world_daily_death, window)

In [43]:
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)

In [45]:
days_in_future = 10
future_forcast = np.array([i for i in range(len(dates) + days_in_future)])
adjusted_dates = future_forcast[: -10]

In [47]:
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
    future_forcast_dates.append((start_date + datetime.timedelta(days = i)).strftime('%m/%d/%Y'))

* daha sonra devam et: https://www.kaggle.com/code/mrkizmaz/coronavirus-covid-19-visualization-prediction/edit