In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from fbprophet import Prophet
import plotly.graph_objects as go
import warnings 

warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Loading and Understanding the Data

In [None]:
df = pd.read_csv('/kaggle/input/pakistan-corona-virus-citywise-data/PK COVID-19-3jun.csv', parse_dates= ['Date'])

In [None]:
df.head()

### Getting Information about the Data

In [None]:
df.info()

#### Checking Missing Values

In [None]:
df.isnull().sum()

Only travel history has null values

We also have other data whose travel history is set to 'Unknown'. Therefore we also will set our missing values to 'Unknown'.

In [None]:
df['Travel_history'].unique()

In [None]:
df['Travel_history'] = df['Travel_history'].replace(df['Travel_history'][df['Travel_history'].isnull()].values, 'Unknown')

Sorting DataFrame w.r.t to Dates

In [None]:
df = df.sort_values('Date')

We can see couple of provinces typed with different spelling. So we have to correct them first.

In [None]:
df['Province'].unique()

In [None]:
df['Province'].replace('khyber Pakhtunkhwa', 'Khyber Pakhtunkhwa', inplace=True )
df['Province'].replace('islamabad Capital Territory', 'Islamabad Capital Territory', inplace=True )

Checking if there's any need to clean Travel History and City Data.

In [None]:
df['Travel_history'].unique()

In [None]:
df['City'].unique()

## New, Recovered and Deaths Cases Per Day

In [None]:
sns.set(font_scale=2.7)

plt.figure(figsize= (20,15))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("New Case, Recovered, Death Per Day in Pakistan" , fontsize = 30)

a = df.groupby('Date')['Cases'].sum().index 
b = df.groupby('Date')['Cases'].sum().values
plt.plot(a,b, marker = '.',ls='--',lw=2,color='black')

x = df.groupby('Date')['Recovered'].sum().index
y = df.groupby('Date')['Recovered'].sum().values
plt.plot(x,y, marker = '.', ls='dashdot',lw=2,color='green')


c = df.groupby('Date')['Deaths'].sum().index 
d = df.groupby('Date')['Deaths'].sum().values
plt.plot(c,d, marker = '.', ls='dashdot',lw=2,color='red')


plt.legend(["Cases", 'Recovered', 'Deaths'], loc ="upper left", fontsize=15)
plt.show()

## Plotting Cummulative Cases

In [None]:
df['Cum_Cases'] = df['Cases'].cumsum()
df['Cum_Deaths'] = df['Deaths'].cumsum()
df['Cum_Recovered'] = df['Recovered'].cumsum()

In [None]:
plt.figure(figsize= (20,15))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel("Dates",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)
plt.title("Total Confirmed, Recovered, Death Cases in Pakistan" , fontsize = 30)

a = df.groupby('Date')['Cum_Cases'].sum().index 
b = df.groupby('Date')['Cum_Cases'].sum().values
plt.plot(a,b, marker = 'o',ls='--',lw=2,color='black')

x = df.groupby('Date')['Cum_Recovered'].sum().index
y = df.groupby('Date')['Cum_Recovered'].sum().values
plt.plot(x,y, marker = 'o', ls='dashdot',lw=2,color='green')


c = df.groupby('Date')['Cum_Deaths'].sum().index 
d = df.groupby('Date')['Cum_Deaths'].sum().values
plt.plot(c,d, marker = 'o', ls='dashdot',lw=2,color='red')


plt.legend(["Cum_Cases", 'Cum_Recovered', 'Cum_Deaths'], loc ="upper left", fontsize=15)
plt.show()

Setting short form for Cities Name.

In [None]:
def short(x):
    if x == 'Islamabad Capital Territory':
        return 'ISB'
    elif x == 'Sindh':
        return 'SD'
    elif x == 'Gilgit-Baltistan':
        return 'GB'
    elif x == 'Baluchistan':
        return 'BL'
    elif x == 'Punjab':
        return 'PJ'
    elif x == 'Khyber Pakhtunkhwa':
        return 'KPK'
    elif x == 'Azad Jummu Kashmir':
        return 'AJK'
    else:
        return 'FATA'
    

df['Province_Acr'] = df['Province'].apply(short)

## City Wise Cases

In [None]:
plt.figure(figsize=(15,10))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel("Province",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)

a = df.groupby('Province_Acr')['Cases'].sum().sort_values(ascending=False).index 
b = df.groupby('Province_Acr')['Cases'].sum().sort_values(ascending=False).values
plt.bar(a,b)

x = df.groupby('Province_Acr')['Recovered'].sum().sort_values(ascending=False).index
y = df.groupby('Province_Acr')['Recovered'].sum().sort_values(ascending=False).values
plt.bar(x,y,)


c = df.groupby('Province_Acr')['Deaths'].sum().sort_values(ascending=False).index 
d = df.groupby('Province_Acr')['Deaths'].sum().sort_values(ascending=False).values
plt.bar(c,d)


plt.legend(["Cases", 'Recovered', 'Deaths'], loc ="upper right", fontsize = 15)

plt.show()

## Cases and Travel History Relation

In [None]:
plt.figure(figsize=(15,9))

sns.set(font_scale=1)
sns.relplot(x="Province_Acr", y="Travel_history", size="Cases",
            sizes=(10, 1500), alpha=.5, color='r',
            height=8, aspect=1.6 , data=df).tight_layout()
plt.show()

## Case Growth Rate

In [None]:
df['Case_Growth_Rate'] = df['Cases'].pct_change()

plt.figure(figsize=(15,8))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)
plt.xlabel("Date",fontsize = 20)
plt.ylabel('Total cases',fontsize = 20)

x = df['Date']
y = df['Case_Growth_Rate']

plt.bar(x,y, color = 'Salmon', width = 1.2)

plt.show()

## Mortality Rate

In [None]:
df['Active_Cases'] = df['Cum_Cases'] - df['Recovered'] - df['Deaths']

corona_data = df.groupby(['Date'])['Cases', 'Recovered',
                                   'Active_Cases','Deaths', 
                                   'Cum_Cases'].sum().reset_index().sort_values('Date',ascending=False)
corona_data['Mortality Rate'] = ((corona_data['Deaths']/corona_data['Cum_Cases'])*100)
corona_data['Recovery Rate'] = ((corona_data['Recovered']/corona_data['Cum_Cases'])*100)

In [None]:
plt.figure(figsize=(15,9))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)

plt.plot(corona_data['Date'], corona_data['Mortality Rate'], marker = 'o', ls='dashdot',lw=2,color='Crimson' )

plt.show()

## Recovery Rate

In [None]:
plt.figure(figsize=(15,9))
plt.xticks(fontsize = 15)
plt.yticks(fontsize = 15)

plt.plot(corona_data['Date'], corona_data['Recovery Rate'], marker = 'o', ls='dashdot',lw=2,color='Indigo' )

plt.show()

### Loading City Coordinates

In [None]:
cord = pd.read_csv('/kaggle/input/pakistan-corona-virus-citywise-data/covid_cases_city_coordinates.csv')
df = df.merge(cord, how='left', on='City').copy()

## Pakistan Corona Case Time Lapse

In [None]:
df['Date_S'] = df['Date'].astype(str)

fig = px.density_mapbox(df, 
                        lat="Latitude (generated)", 
                        lon="Longitude (generated)", 
                        hover_name="City", 
                        hover_data=["Cum_Cases","Cum_Deaths","Cum_Recovered"], 
                        animation_frame="Date_S",
                        color_continuous_scale="Portland",
                        radius=20, 
                        zoom=4.5,height=750)
fig.update_layout(title='Pakistan Corona Virus Cases Time Lapse - Confirmed, Deaths, Recovered',
                  font=dict(family="Courier New, monospace",
                            size=18,
                            color="#7f7f7f")
                 )
fig.update_layout(mapbox_style="open-street-map", mapbox_center_lon=70, mapbox_center_lat=31)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})



fig.show()

## Province Wise - Daily Cases vs Daily Recovery

In [None]:
sns.set(font_scale=2.7)
g = sns.FacetGrid(df, col="Province", height=11, aspect=1.2, col_wrap=2, margin_titles=True)
g.map(sns.lineplot,'Date', 'Cases', color='red', label='New Case', ls='--', lw=4)
g.map(sns.lineplot,'Date', 'Recovered', color='DarkOliveGreen', label='New Recovered', ls='-.', lw=4)
g.set_xticklabels(rotation=90)
g.tight_layout()
g.set_ylabels('Total Cases')
g.add_legend()

g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Daily Cases vs Daily Recovery', x=0.45, y=0.95, fontsize=50)

plt.show()

## Forecasting

Forecasting of Total Cases for Next 30 Days.

In [None]:
d_new = df.groupby('Date')['Active_Cases'].sum().reset_index()
d_new.columns = ['ds', 'y']

m = Prophet()
m.fit(d_new)

future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)

fig = go.Figure()

fig.add_trace(go.Scatter( x = forecast['ds'], y = forecast['yhat'],
                        mode='lines+markers',name='Predicted Cases',marker_color='DarkSlateGray'))
fig.add_trace(go.Scatter( x = d_new['ds'], y = d_new['y'],
                        mode='lines+markers',name='Actual Cases',marker_color='Tomato'))

Forecasting of Deaths for Next 30 Days.

In [None]:
d_new = df.groupby('Date')['Deaths'].sum().reset_index()
d_new.columns = ['ds', 'y']

m = Prophet()
m.fit(d_new)

future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)

fig = go.Figure()

fig.add_trace(go.Scatter( x = forecast['ds'], y = forecast['yhat'],
                        mode='lines+markers',name='Predicted Cases',marker_color='FireBrick'))
fig.add_trace(go.Scatter( x = d_new['ds'], y = d_new['y'],
                        mode='lines+markers',name='Actual Cases',marker_color='DarkBlue'))

Forecasting of Cured for Next 30 Days

In [None]:
d_new = df.groupby('Date')['Recovered'].sum().reset_index()
d_new.columns = ['ds', 'y']

m = Prophet()
m.fit(d_new)

future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)

fig = go.Figure()

fig.add_trace(go.Scatter( x = forecast['ds'], y = forecast['yhat'],
                        mode='lines+markers',name='Predicted Cases',marker_color='Green'),)
fig.add_trace(go.Scatter( x = d_new['ds'], y = d_new['y'],
                        mode='lines+markers',name='Actual Cases',marker_color='Gold'))

#### Do let me know if you find this notebook helpful.... Thanks