# **Step 1 : Importing Packages**

In [1]:
# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# interactive plots
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# geographical ploting
# import folium
# import geopandas as gpd

ModuleNotFoundError: No module named 'plotly'

In [None]:
# offline plotly
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [None]:
# color pallette
cnf = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active case - yellow

# Step 2 : Reading Data

In [None]:
! ls ../input/covid19-corona-virus-india-dataset

In [None]:
# daily
# =====

# read data
daily = pd.read_csv('../input/covid19-corona-virus-india-dataset/nation_level_daily.csv')

# convert datetime datatype
daily['Date'] = pd.to_datetime(daily['Date'] + ' 2020')

# get no. of active cases
daily['Total Active'] = daily['Total Confirmed'] - daily['Total Deceased'] - daily['Total Recovered']

# Deaths / 100 cases
daily['Deaths / 100 Cases'] = round((daily['Total Deceased'] / daily['Total Confirmed'])*100, 2)
# Recovered / 100 cases
daily['Recovered / 100 Cases'] = round((daily['Total Recovered'] / daily['Total Confirmed'])*100, 2)
# Deaths / 100 recovered
daily['Deaths / 100 Recovered'] = round((daily['Total Deceased'] / daily['Total Recovered'])*100, 2)

# first few rows
daily.head()

# Step 3 : Data Visualisation

In [None]:
def plot_daily(col, hue):
    fig = px.bar(daily, x="Date", y=col, title=col, 
                 color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
plot_daily('Total Confirmed', '#000000')

In [None]:
plot_daily('Daily Confirmed', '#000000')

In [None]:
plot_daily('Total Active', '#000000')

In [None]:
plot_daily('Total Deceased', dth)

In [None]:
plot_daily('Daily Deceased', dth)

In [None]:
plot_daily('Total Recovered', rec)

In [None]:
plot_daily('Daily Recovered', rec)

In [None]:
def plot_daily(col, hue):
    temp = tests_dbd.copy()
    # temp = temp[~temp[col].isna()]
    fig = px.scatter(temp, x="Update Time Stamp", 
                 y=col, title=col, 
                 color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [None]:
# stacked bar chart

# subset columns
temp = daily[['Date', 'Total Active', 'Total Deceased', 'Total Recovered']]

# melt data
temp = temp.melt(value_vars=['Total Recovered', 'Total Deceased', 'Total Active'],
                 id_vars="Date", var_name='Case', value_name='Count')
temp.head()

# plot
fig_2 = px.bar(temp, x="Date", y="Count", color='Case', 
               title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig_2.show()

In [None]:
# stacked bar chart

# subset columns
temp = daily[['Date', 'Daily Confirmed', 'Daily Deceased', 'Daily Recovered']]

# melt data
temp = temp.melt(value_vars=['Daily Recovered', 'Daily Deceased', 'Daily Confirmed'],
                 id_vars="Date", var_name='Case', value_name='Count')
temp.head()

# plot
fig_2 = px.bar(temp, x="Date", y="Count", color='Case', 
               title='Daily cases over time', color_discrete_sequence = [rec, dth, act])
fig_2.show()

In [None]:
# Daily statistics
# ================

temp = daily[daily['Total Confirmed'] > 100]

fig_c = px.line(temp, x="Date", y="Deaths / 100 Cases", color_discrete_sequence=['#000000'])
fig_d = px.line(temp, x="Date", y="Recovered / 100 Cases", color_discrete_sequence=['#649d66'])
fig_r = px.line(temp, x="Date", y="Deaths / 100 Recovered", color_discrete_sequence=['#ff677d'])

fig = make_subplots(rows=1, cols=3, shared_xaxes=False, 
                    subplot_titles=('No. of Deaths to 100 Cases', 
                                    'No. of Recovered to 100 Cases', 
                                    'No. of Deaths to 100 Recovered'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
fig.add_trace(fig_r['data'][0], row=1, col=3)

In [None]:
temp = daily.loc[:, ['Date', 'Total Active', 'Total Recovered']]
temp = temp.melt(id_vars='Date', value_vars=['Total Active', 'Total Recovered'])
temp.head()

fig_c = px.line(temp, x="Date", y="value", color='variable', line_dash='variable', 
                color_discrete_sequence=[dth, rec])
fig_c.update_layout(title='Active vs Recovered cases', 
                  xaxis_title='', yaxis_title='')
fig_c.show()

In [None]:
from fbprophet import Prophet
confirmed = daily.groupby('Date').sum()['Total Confirmed'].reset_index()
deaths = daily.groupby('Date').sum()['Total Deceased'].reset_index()
recovered = daily.groupby('Date').sum()['Total Recovered'].reset_index()

In [None]:
confirmed.columns = ['ds','y']
confirmed.tail()

In [None]:
plt.figure(figsize = (20,10))
plt.bar(daily['Date'],daily['Total Deceased'])
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.title('Death Rate')

# Step 3 : Training the Data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = daily.iloc[:,:-1].values
Y = daily.iloc[:,-1].values

In [None]:
X

In [None]:
Y

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.5,random_state = 0)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
Y_train.shape

In [None]:
Y_test.shape

In [None]:
train_df = daily

In [None]:
from fbprophet import Prophet
model = Prophet()

In [None]:
train_df = pd.DataFrame(train_df) 
   
# Before renaming the columns 
print(train_df.columns) 
   
train_df.rename(columns = {"Date":'ds', "Total Deceased":'y'}, inplace = True) 
   
# After renaming the columns 
print(train_df.columns)

In [None]:
train_df

# Step 4 : Prediction Model

In [None]:
train_df.describe()

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size = 0.2,random_state=123)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_regression_model = LinearRegression()

In [None]:
X_train

In [None]:
X

In [None]:
import time
from datetime import datetime

In [None]:
A = train_df.astype(int,copy = True, errors = 'ignore')

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def f(X_train):
    return np.int(X_train)
f2 = np.vectorize(f)
X_train = np.arange(1, 15.1, 0.1)

In [None]:
train_df = train_df.dropna()

In [None]:
A

In [None]:
X_train

In [None]:
Y_train

In [None]:
df = pd.DataFrame(X_train) 
  
# Number of rows to drop 
n = 3
  
# Using head() to  
# drop last n rows 
df1 = df.head(-n) 
  
# Printing dataframe 
print(df1) 

In [None]:
df = pd.DataFrame(Y_train) 
  
# Number of rows to drop 
n = 3
  
# Using head() to  
# drop last n rows 
df2 = df.head(-n) 
  
# Printing dataframe 
print(df2)

In [None]:
df1.shape

In [None]:
df2.shape

In [None]:
df1

In [None]:
df2

In [None]:
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer()
data_with_imputed_values1 = my_imputer.fit_transform(df1)

In [None]:
from sklearn.impute import SimpleImputer
my_imputer = SimpleImputer()
data_with_imputed_values = my_imputer.fit_transform(df2)

In [None]:
display(data_with_imputed_values1,data_with_imputed_values)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score
# create the inputs and outputs
X, y = make_blobs(n_samples=1000, centers=2, n_features=2, random_state=2)
# define model
model = LogisticRegression(solver='lbfgs')
# fit model
model.fit(X, y)
# make predictions
yhat = model.predict(X)
# evaluate predictions
acc = accuracy_score(y, yhat)
print(acc)

In [None]:
# define input
new_input = [[2.12309797, -1.41131072]]

In [None]:
# get prediction for new input
new_output = model.predict(new_input)

In [None]:
# make a single prediction with the model
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_blobs
# create the inputs and outputs
X, y = make_blobs(n_samples=1000, centers=2, n_features=2, random_state=2)
# define model
model = LogisticRegression(solver='lbfgs')
# fit model
model.fit(X, y)
# define input
new_input = [[2.12309797, -1.41131072]]
# get prediction for new input
new_output = model.predict(new_input)
# summarize input and output
print(new_input, new_output)

In [None]:
# make a single prediction with the model
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_blobs
# create the inputs and outputs
X, y = make_blobs(n_samples=1000, centers=2, n_features=2, random_state=2)
# define model
model = LogisticRegression(solver='lbfgs')
# fit model
model.fit(X, y)
# make predictions on the entire training dataset
yhat = model.predict(X)
# connect predictions with outputs
for i in range(10):
	print(X[i], yhat[i])