In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [None]:
sns.set(style='darkgrid')

# **LOADING DATA**

In [None]:
covid_india = pd.read_csv('/kaggle/input/covid19-confirmed-cases-kerala/Confirmed Covid 19 Cases Kerala.csv')

# **DESCRIPTION DATA**

In [None]:
covid_india.tail()

In [None]:
covid_india.shape

In [None]:
covid_india['Date'] = pd.to_datetime(covid_india['Date'])

In [None]:
covid_india['Confirmed'] = covid_india['Confirmed'].astype('int64')

In [None]:
covid_india.dtypes

In [None]:
covid_india.isna().sum()

# **Exploratory data analysis**

In [None]:
mask_month = (covid_india['Date'] >= '2021-01-01') & (covid_india['Date'] <= '2021-05-28')

In [None]:
filter_covid = covid_india.loc[mask_month]

In [None]:
plt.figure(figsize=[15,5])
sns.scatterplot(x='Date', y= 'Confirmed', data=filter_covid);
sns.scatterplot
plt.title('Cases in January to May 2021 - Kelara - India')

In [None]:
plt.figure(figsize=[12,8])
sns.barplot(x=covid_india['Date'].dt.year, y='Confirmed', data=covid_india)
plt.title('Confirmed cases 2020 and 2021 - Kelara - India')
plt.xlabel('Year');

In [None]:
new_cases =  covid_india.iloc[-2] - covid_india.iloc[-1]

In [None]:
print(new_cases['Confirmed'])

# **DEATH PREDICTION FOR KALARA - INDIA**

In [None]:
y = covid_india['Confirmed']

In [None]:
y = np.array(y).reshape(-1,1)

In [None]:
y.shape

In [None]:
X = np.arange(len(covid_india['Date'])).reshape(-1,1)

In [None]:
X.shape

In [None]:
forecast = np.arange(len(covid_india['Date']) + 14 ).reshape(-1,1)

In [None]:
forecast.shape

# **Spliting base train/test**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = False)

# **POLYNOMIAL REGRESSION**

In my tests, degree = 5 got the best results.

In [None]:
poly = PolynomialFeatures(degree=5)
X_Train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

In [None]:
X_test_poly.shape

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
regressor = LinearRegression()
regressor.fit(X_Train_poly, y_train)

# **PREDCTION**

In [None]:
poly_test_pred = regressor.predict(X_test_poly)

In [None]:
plt.figure(figsize=[14,8])
plt.plot(poly_test_pred, linestyle= 'dashed')
plt.plot(y_test)

In [None]:
print('MAE:', mean_absolute_error(poly_test_pred, y_test) )
print('MSE:', mean_squared_error(poly_test_pred, y_test) )
print('MSE:', np.sqrt(mean_squared_error(poly_test_pred, y_test) ))

# **PREDICT THE CASES FOR NEXT TWO WEEKS**

In [None]:
x_train_all = poly.transform(forecast)
x_train_all.shape

In [None]:
poly_pred_all = regressor.predict(x_train_all)

In [None]:
plt.figure(figsize=[14,8])
plt.plot(forecast[:-14], y)
plt.plot(forecast, poly_pred_all, linestyle = 'dashed')
plt.title('Cases of Covid-19')
plt.xlabel('days since 2020-01-31')
plt.ylabel('Number of Cases')
plt.legend(['Cases', 'Predictions']);