This is based on data up to May 31, 2021. Here are the datasets used:
1. [Coronavirus dataset from Our World in Data](http://ourworldindata.org/coronavirus/country/nepal)
2. [Mobility dataset from Google Mobility Report](http://www.google.com/covid19/mobility/)
3. [Recovery dataset from Johns Hopkins University](http://https://github.com/CSSEGISandData/COVID-19)
4. [Vaccine data from Our World in Data](http://github.com/owid/covid-19-data/blob/master/public/data/vaccinations/country_data/Nepal.csv)

## [I have written an article on what I found visualizing covid-19 in Nepal.You can click here to read it.](https://komalniraula.medium.com/what-i-found-visualizing-covid-19-in-nepal-8d80ad56e669)

This notebook contains more visualization and analysis than what is presented in the article. I have also given prediction model here. 

## Importing files

In [None]:
# Input data files are available in the read-only "../input/" directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px

## Reading the first Dataset

In [None]:
df = pd.read_csv('/kaggle/input/visualizing-covid19-in-nepal/Global_Mobility_Report.csv')
print(df.head)
list(df)

## Getting Nepal Data

In [None]:
df3 = df[df['country_region'] == 'Nepal']
print(df3.isnull().sum())
df3

## Feature Preprocessing

In [None]:
df3 = df3.drop(columns = ['country_region', 'country_region_code','sub_region_1', 'sub_region_2', 'metro_area', 'iso_3166_2_code', 'census_fips_code', 'place_id'])
df3 = df3.iloc[:-1 , :]
df3

In [None]:
df3 = df3.rename(columns={"retail_and_recreation_percent_change_from_baseline": "retail_recreation", "grocery_and_pharmacy_percent_change_from_baseline": "grocery_pharmacy", "parks_percent_change_from_baseline": "parks", "transit_stations_percent_change_from_baseline": "transit_stations", "workplaces_percent_change_from_baseline": "workplaces", "residential_percent_change_from_baseline": "residential"})
df3['date'] = pd.to_datetime(df3.date)
df3 = df3.set_index('date')
df3.sort_index(inplace = True)
df3

## Visualizing the data

In [None]:
plt.style.use('dark_background')
df3.plot(subplots=True, legend=True, figsize=(10,8))
plt.show()

## Visualizing in same graph

In [None]:
!pip install mplcyberpunk 
import mplcyberpunk

plt.style.use("cyberpunk")
df3.plot(title='Change/in Mobility', xlabel = 'Dates', figsize=(10,8))
mplcyberpunk.add_glow_effects()
plt.show()

## Reading second Dataset

In [None]:
dfr = pd.read_csv('/kaggle/input/visualizing-covid19-in-nepal/recovery_rate.csv')
dfr

## Feature Preprocessing

In [None]:
dfr = dfr[dfr['Country/Region'] == 'Nepal']
dfr = dfr.drop(columns = ['Lat', 'Long', 'Province/State'])
dfr = dfr.T
dfr

## Feature Engineering

In [None]:
dfr = dfr.reset_index()
dfr = dfr.iloc[1:-4 , :]
dfr = dfr.rename(columns = {'index': 'date', 177: 'Recovery'})
dfr['date'] = pd.to_datetime(dfr.date)
dfr = dfr.set_index('date')
dfr['daily_recovery'] = dfr['Recovery'].diff().fillna(0)
dfr

## Reading Third Dataset

In [None]:
dfn1 = pd.read_csv('/kaggle/input/visualizing-covid19-in-nepal/owid-covid-data.csv')
dfn = dfn1[dfn1['location'] == 'Nepal']
dfn

## Feature Preprocessing

In [None]:
dfn = dfn.drop(columns = ['icu_patients', 'gdp_per_capita', 'extreme_poverty', 'cardiovasc_death_rate', 'diabetes_prevalence',
                          'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
                          'life_expectancy', 'human_development_index', 'new_deaths_smoothed_per_million', 
                          'new_cases_smoothed', 'new_deaths_smoothed', 'iso_code', 'new_cases_smoothed_per_million', 
                          'continent', 'location', 'icu_patients_per_million', 'icu_patients_per_million', 'hosp_patients', 
                          'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 
                          'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'population', 'population_density',
                          'median_age', 'aged_65_older', 'aged_70_older', 'new_vaccinations_smoothed_per_million',
                          'new_vaccinations_smoothed', 'tests_units', 'total_vaccinations', 'people_vaccinated', 
                          'people_fully_vaccinated', 'new_vaccinations', 'new_vaccinations_smoothed', 
                          'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 
                          'people_fully_vaccinated_per_hundred', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
                          'reproduction_rate'])
dfn['date'] = pd.to_datetime(dfn.date)
dfn = dfn.set_index('date')
dfn.sort_index(inplace = True)
dfn['daily_recovery'] = dfr['daily_recovery']
dfn['recovery'] = dfr['Recovery']
dfn = dfn.iloc[:-4 , :]
dfn

## Reading Fourth Dataset

In [None]:
dfv = pd.read_csv('/kaggle/input/visualizing-covid19-in-nepal/Nepal_vaccine.csv')
dfv

## Feature Preprocessing

In [None]:
dfv = dfv.drop(columns = ['location', 'source_url', 'vaccine'])
dfv['date'] = pd.to_datetime(dfv.date)
dfv = dfv.set_index('date')
dfv.sort_index(inplace = True)
dfv

## Visualiztion

In [None]:
dff = dfn[['total_cases', 'total_tests']]
fig = px.area(dff)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1))
fig.update_layout(
    title_text="Total cases vs Total tests"
)
    
fig.update_layout(legend_title_text='')
    
fig.show()

In [None]:
dff = dfn[['new_cases', 'new_tests']]
fig = px.area(dff)

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1))

fig.update_layout(
    title_text="New cases vs New tests"
)
    
fig.update_layout(legend_title_text='')

fig.show()

In [None]:
dfv1 = dfv[['people_fully_vaccinated', 'total_vaccinations']]
fig = px.area(dfv1)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1))

fig.update_layout(
    title_text="Vaccination"
)
    
fig.update_layout(legend_title_text='')

fig.show()

## Visualizing Mobility & New Cases

In [None]:
def mobility(mobility_name, title):
    fig = make_subplots(specs=[[{"secondary_y": True}]])


    fig.add_trace(
        go.Scatter(x = df3.index, y=df3[mobility_name], name=title),
        row=1, col=1, secondary_y=False)

    fig.add_trace(
        go.Scatter(x = dfn.index, y=dfn['new_cases'], name="New Cases"),
        row=1, col=1, secondary_y=True,
    )

    fig.update_layout(
        title_text="Change in Mobility in {} vs New Cases".format(title)
    )

    # Set x-axis title
    fig.update_xaxes(title_text="date")
    
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1))
    
    fig.update_layout(legend_title_text='')
    # Set y-axes titles
    fig.update_yaxes(title_text="<b>percent change in mobility</b>", secondary_y=False)
    fig.update_yaxes(title_text="<b>new cases</b>", secondary_y=True)


    return fig.show()

In [None]:
mobility('retail_recreation', 'Retail & Recreational Area')

In [None]:
mobility('residential', 'Residential Area')

In [None]:
mobility('transit_stations', 'Transit Station')

In [None]:
mobility('parks', 'Parks')

In [None]:
df3.loc['2021-04-12']

In [None]:
mobility('workplaces', 'Workplaces')

In [None]:
mobility('grocery_pharmacy', 'Grocery & Pharmacy')

In [None]:
fig = px.line(dfn, x=dfn.index, y=["daily_recovery", 'new_cases'], title='Daily Recovery vs New Cases')
fig.show()

## Number of days when recovery was higher than cases

In [None]:
dfrc = pd.DataFrame()
dfrc['rec_vs_cas'] = dfn['daily_recovery'] - dfn['new_cases']
dfrc["rec_vs_cas_count"] = np.where(dfrc["rec_vs_cas"]>0,1,0)
dfrc.drop(dfrc.index[dfrc['rec_vs_cas'] == 0], inplace = True)
dfrc['rec_vs_cas_count'].value_counts()
#0 = Days when cases were higher than recovery
#1 = Days when recovery was higher than cases

## Creating new dataset with monthly total

In [None]:
dfm = pd.DataFrame()
dfm['total_test'] = dfn['new_tests'].resample('M').sum()
dfm['total_cases'] = dfn['new_cases'].resample('M').sum()
dfm['total_deaths'] = dfn['new_deaths'].resample('M').sum()
dfm['total_recovery'] = dfn['daily_recovery'].resample('M').sum()
dfm

## Visualizing Monthly Report

In [None]:
fig = px.bar(dfm, x=dfm.index, y=["total_deaths", "total_cases", "total_recovery", "total_test"], title="Monthly Report")
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_layout(legend_title_text='')
fig.show()

## Feature Scaling Monthly Report

In [None]:
x = dfm.values #returns a numpy array
min_max_scaler = MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
dfmt = pd.DataFrame(x_scaled)
dfmt.index = dfm.index
dfmt = dfmt.rename(columns = {0: 'total_tests', 1: 'total_cases', 2: 'total_deaths'})
dfmt

## Visualizing Scaled Monthly Report

In [None]:
fig = px.bar(dfmt, x=dfmt.index, y=["total_deaths", "total_cases", "total_tests"], title="Monthly Report")
fig.show()

## Visualizing Stringency Index vs New Cases

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x = dfn.index, y=dfn['stringency_index'], name='stringency_index'),
    row=1, col=1, secondary_y=False)

fig.add_trace(
    go.Scatter(x = dfn.index, y=dfn['new_cases'], name="new_cases"),
    row=1, col=1, secondary_y=True,
)

fig.update_layout(
    title_text="Stringency Index Vs New Cases"
)

# Set x-axis title
fig.update_xaxes(title_text="date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>stringency index</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>new cases</b>", secondary_y=True)

fig.show()

## Visualizing Stringency Index vs Mobility Change in Parks

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x = dfn.index, y=dfn['stringency_index'], name='stringency_index'),
    row=1, col=1, secondary_y=False)



fig.add_trace(
    go.Scatter(x = df3.index, y=df3['parks'], name="percent change in mobility of parks"),
    row=1, col=1, secondary_y=True,
)

fig.update_layout(
    title_text="Stringency Index Vs Mobility Change in Parks"
)

# Set x-axis title
fig.update_xaxes(title_text="date")

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1))
    
fig.update_layout(legend_title_text='')


# Set y-axes titles
fig.update_yaxes(title_text="<b>stringency index</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>percent change in mobility in parks</b>", secondary_y=True)

fig.show()

## Prediction Model

### Feature Preprocessing

In [None]:
df_main = pd.concat([df3, dfn], axis=1)
df_main = df_main.fillna(0)
df_main['case_change'] = df_main['new_cases'].diff(periods=-1)
df_main['death_change'] = df_main['new_deaths'].diff(periods=-1)


### Feature Engineering

In [None]:
df_main["case_prediction"] = np.where(df_main["case_change"]>0,0,1)
df_main["death_prediction"] = np.where(df_main["death_change"]>0,0,1)
df_main

### Creating train & test set

In [None]:
from sklearn.model_selection import train_test_split
feature_names = ['grocery_pharmacy', 'parks', 'transit_stations', 'residential', 'new_cases', 'new_deaths', 'daily_recovery']
X = df_main[feature_names]
y = df_main["case_prediction"] #Couldn't get better accuracy while predicting cases
y2 = df_main["death_prediction"] #Will be using this
X_train, X_test, y_train, y_test = train_test_split(X, y2, random_state=0)

### Using Gradient boosting algorithm to predict if the death with will increase or decrease the next day

In [None]:
#Got best result (76% accuracy) on test set using Gradient Boosting Classifier.
#Tried many algorithms, but this (76%) was best
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier().fit(X_train, y_train)
y_predict = clf.predict(X_test)
print('Accuracy of Gradient Boosting classifier on training set: {:.2f}'
     .format(clf.score(X_train, y_train)))
print('Accuracy of Gradient Boosting classifier on test set: {:.2f}'
     .format(clf.score(X_test, y_test)))