In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
patient = pd.read_csv('/kaggle/input/coronavirusdataset/patient.csv')
case = pd.read_csv('/kaggle/input/coronavirusdataset/case.csv')
trend = pd.read_csv('/kaggle/input/coronavirusdataset/trend.csv')
time = pd.read_csv('/kaggle/input/coronavirusdataset/time.csv')
route = pd.read_csv('/kaggle/input/coronavirusdataset/route.csv')


In [None]:
patient.info()

In [None]:
patient.describe()

In [None]:
patient.head()

In [None]:
patient.isna().sum()

In [None]:
patient.shape

In [None]:
# Creating a copy of patients
data = patient.copy()

In [None]:
# Changing the data type of Year to int type
patient['birth_year'] = patient['birth_year'].fillna(0.0).astype(int)
patient['birth_year'] = patient['birth_year'].map(lambda x: x if x > 0 else np.nan)

In [None]:
patient['age'] = 2020 - patient['birth_year']

In [None]:
import math
def age_group(age):
    if age >= 0:
        if age % 10 != 0:
            lower = (math.floor(age/10))*10
            upper = (math.ceil(age/10))*10-1
            return (str(lower) + '-' + str(upper))
        else:
            lower = int(age)
            upper = int(age + 9)
            return f"{lower} - {upper}"
    return ('Unknown')

In [None]:
patient['age_group'] = patient['age'].apply(age_group)

In [None]:
patient['confirmed_date'] = pd.to_datetime(patient['confirmed_date'])
patient['released_date'] = pd.to_datetime(patient['released_date'])
patient['deceased_date'] = pd.to_datetime(patient['deceased_date'])

In [None]:
patient['release_time'] = patient['released_date'] - patient['confirmed_date']
patient['death_time'] = patient['deceased_date'] - patient['confirmed_date']
patient['duration_since_confirmed'] = patient[['release_time', 'death_time']].min(axis=1)
patient["duration_days"] = patient["duration_since_confirmed"].dt.days

In [None]:
recovered_patients = patient.loc[patient['state'] == 'released'].shape[0] / patient.shape[0]
isolated_patients = patient.loc[patient['state'] == 'isolated'].shape[0] / patient.shape[0]
deceased_patients = patient.loc[patient['state'] == 'deceased'].shape[0] / patient.shape[0]

print('The percentage of recovery is ' + str(recovered_patients*100))
print('The percentage of isolated is ' + str(isolated_patients*100))
print('The percentage of deceased is ' + str(deceased_patients*100))

In [None]:
k = patient['state'].value_counts()
k.plot(kind='pie', figsize=(20,10),legend=True)
plt.legend(loc=0, bbox_to_anchor=(1.5,0.5))

In [None]:
a = patient['infection_reason'].value_counts()
a.plot(kind='pie', figsize=(20,10), legend=True)
plt.legend(loc=0, bbox_to_anchor=(1.5,0.5))

**Split the Data according to the state of the patient**

In [None]:
released = patient[patient.state == 'released']
isolated = patient[patient.state == 'isolated']
deceased = patient[patient.state == 'deceased']

In [None]:
patient['state_by_gender'] = patient['state'] + '_' + patient['sex']

In [None]:
plt.figure(figsize=(15,10))
sns.barplot(x='country', y='age', hue='state', data=patient)
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(15,10))
sns.barplot(x='country', y='age', hue='sex', data=patient)
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(data=patient, x='state', hue='sex')

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(data=patient, x='state', hue='infection_reason')

In [None]:
plt.figure(figsize=(15,10))
sns.violinplot(data=patient, x='state', y='age', hue='sex')
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.violinplot(data=patient, x='sex', y='age')
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.violinplot(data=patient,x='country',y='age',hue='sex')
plt.show()

**Accumulated Count of Patients till date**

In [None]:
daily_count = patient.groupby(patient.confirmed_date).patient_id.count()

In [None]:
accumulated_count = daily_count.cumsum()

In [None]:
plt.figure(figsize=(15,10))
accumulated_count.plot()
plt.title('Accumulated Count of Confirmed Cases')

In [None]:
sns.kdeplot(data=deceased['age'],label='deceased', shade=True)
sns.kdeplot(data=released['age'],label='released', shade=True)
sns.kdeplot(data=isolated['age'],label='isolated', shade=True)

In [None]:
plt.figure(figsize=(15, 5))
plt.title('Sex')
deceased.sex.value_counts().plot.bar()

In [None]:
plt.figure(figsize=[18,7])
sns.countplot(data=patient,x='age_group',hue='state_by_gender')
plt.legend(loc='best')

In [None]:
sns.set_style("whitegrid")
sns.FacetGrid(patient, hue = 'state', height = 10).map(plt.scatter, 'age', 'region').add_legend()
plt.title('Region by age and state')
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(data=deceased,x='sex',hue='disease')

In [None]:
# disease = 1 means that the patient had an underlying disease.
plt.figure(figsize=[12,7])
sns.countplot(data=patient,x='state',hue='disease')

**Route Dataset**

In [None]:
route.head()

In [None]:
route.info()

In [None]:
route.describe()

In [None]:
route.isna().sum()

In [None]:
cluster = route.loc[:, ['id', 'latitude', 'longitude']]
cluster.head(5)

In [None]:
import matplotlib.dates as mdates
import plotly.express as px
from datetime import date, timedelta
from sklearn.cluster import KMeans
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot
import plotly.offline as py
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.api as sm
from keras.models import Sequential
from keras.layers import LSTM,Dense
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
K_clusters = range(1,11)
kmeans = [KMeans(n_clusters=i) for i in K_clusters]
Y_axis = route[['latitude']]
X_axis = route[['longitude']]
score = [kmeans[i].fit(Y_axis).score(Y_axis) for i in range(len(kmeans))]
plt.plot(K_clusters, score)
plt.xlabel('Number of Clusters')
plt.ylabel('Score')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=4, init='k-means++')
kmeans.fit(cluster[cluster.columns[1:3]])
cluster['cluster_label'] = kmeans.fit_predict(cluster[cluster.columns[1:3]])
centers = kmeans.cluster_centers_
labels = kmeans.predict(cluster[cluster.columns[1:3]])

In [None]:
cluster.plot.scatter(x='latitude', y='longitude', c=labels, s=50, cmap='viridis')
plt.scatter(centers[:,0], centers[:,1], c='black', s=100, alpha=0.5)

In [None]:
import folium
southkorea_map = folium.Map(location=[36.55,126.983333 ], zoom_start=7,tiles='Stamen Toner')

for lat, lon,city in zip(route['latitude'], route['longitude'],route['city']):
    folium.CircleMarker([lat, lon],
                        radius=5,
                        color='red',
                      popup =('City: ' + str(city) + '<br>'),
                        fill_color='red',
                        fill_opacity=0.7 ).add_to(southkorea_map)
southkorea_map

In [None]:
plt.figure(figsize=(15,10))
plt.title('Number of Patients in each City')
route.city.value_counts().plot.bar()

In [None]:
plt.figure(figsize=(15,5))
plt.title('Number patients in Province/State')
route.province.value_counts().plot.bar()

In [None]:
plt.figure(figsize=(15,5))
plt.title('Visit')
route.visit.value_counts().plot.bar()

**Time from confirmation to release or death based on gender**

In [None]:
plt.figure(figsize=(12, 8))
sns.boxplot(x="sex",
            y="duration_days",hue='state',
            hue_order=["released", "deceased"],
            data=patient)
plt.title("Time from confirmation to release or death")

In [None]:
plt.figure(figsize=(12, 8))
sns.boxplot(x="age_group",
            y="duration_days",hue='state',
            hue_order=["released", "deceased"],
            data=patient)
plt.title("Time from confirmation to release or death")

**Trend of Different diseases dataset**

In [None]:
trend.describe()

In [None]:
trend.info()

In [None]:
trend_cold=trend[['date','cold']]
trend_flu=trend[['date','flu']]
trend_pneumonia=trend[['date','pneumonia']]
trend_coronavirus=trend[['date','coronavirus']]

In [None]:
trend_cold['date']=pd.to_datetime(trend_cold['date'])
trend_cold.index=trend_cold['date']
trend_cold.drop(['date'],axis=1,inplace=True)
trend_flu['date']=pd.to_datetime(trend_flu['date'])
trend_flu.index=trend_flu['date']
trend_flu.drop(['date'],axis=1,inplace=True)
trend_pneumonia['date']=pd.to_datetime(trend_pneumonia['date'])
trend_pneumonia.index=trend_pneumonia['date']
trend_pneumonia.drop(['date'],axis=1,inplace=True)
trend_coronavirus['date']=pd.to_datetime(trend_coronavirus['date'])
trend_coronavirus.index=trend_coronavirus['date']
trend_coronavirus.drop(['date'],axis=1,inplace=True)

In [None]:
decomposition = seasonal_decompose(trend_cold) 
trend_cld = decomposition.trend
plt.figure(figsize=(18, 8))
plt.plot(trend_cld, label='Trend')
plt.title('Trend of Cold')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(18, 8))
decomposition = seasonal_decompose(trend_flu) 
trend_fl = decomposition.trend
plt.plot(trend_fl, label='Trend')
plt.title('Trend of flu')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(18, 8))
decomposition = seasonal_decompose(trend_pneumonia) 
trend_pneu = decomposition.trend
plt.plot(trend_pneu, label='Trend')
plt.title('Trend of Pneumonia')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(18, 8))
decomposition = seasonal_decompose(trend_coronavirus) 
trend_corona = decomposition.trend
plt.plot(trend_corona, label='Trend')
plt.title('Trend of Coronavirus')
plt.legend(loc='best')