# New Section

In [12]:
import pandas as pd
import numpy as np

from datetime import timedelta

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [15]:
# Reading COVID-19 data from CSV
covid = pd.read_csv("C:/Users/sures/Downloads/covid/covid_19_data.csv")

covid

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
156287,156288,11/15/2020,Zaporizhia Oblast,Ukraine,2020-11-16 05:25:57,18484.0,164.0,3021.0
156288,156289,11/15/2020,Zeeland,Netherlands,2020-11-16 05:25:57,5041.0,86.0,0.0
156289,156290,11/15/2020,Zhejiang,Mainland China,2020-11-16 05:25:57,1291.0,1.0,1279.0
156290,156291,11/15/2020,Zhytomyr Oblast,Ukraine,2020-11-16 05:25:57,22225.0,368.0,12266.0


In [16]:
covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156292 entries, 0 to 156291
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SNo              156292 non-null  int64  
 1   ObservationDate  156292 non-null  object 
 2   Province/State   111976 non-null  object 
 3   Country/Region   156292 non-null  object 
 4   Last Update      156292 non-null  object 
 5   Confirmed        156292 non-null  float64
 6   Deaths           156292 non-null  float64
 7   Recovered        156292 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 9.5+ MB


In [17]:
covid.columns

Index(['SNo', 'ObservationDate', 'Province/State', 'Country/Region',
       'Last Update', 'Confirmed', 'Deaths', 'Recovered'],
      dtype='object')

In [18]:
#Converting "Observation Date" into Datetime format
covid["ObservationDate"]=pd.to_datetime(covid["ObservationDate"])

In [19]:
# Grouping COVID-19 data by country and observation date
grouped_country = covid.groupby(["Country/Region", "ObservationDate"]).agg({"Confirmed": 'sum',
                                                                            "Recovered": 'sum',
                                                                            "Deaths": 'sum'})

grouped_country

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Recovered,Deaths
Country/Region,ObservationDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Azerbaijan,2020-02-28,1.0,0.0,0.0
"('St. Martin',)",2020-03-10,2.0,0.0,0.0
Afghanistan,2020-02-24,1.0,0.0,0.0
Afghanistan,2020-02-25,1.0,0.0,0.0
Afghanistan,2020-02-26,1.0,0.0,0.0
...,...,...,...,...
occupied Palestinian territory,2020-03-12,0.0,0.0,0.0
occupied Palestinian territory,2020-03-14,0.0,0.0,0.0
occupied Palestinian territory,2020-03-15,0.0,0.0,0.0
occupied Palestinian territory,2020-03-16,0.0,0.0,0.0


In [20]:
# Calculating active cases and applying log transformations
grouped_country["Active Cases"] = grouped_country["Confirmed"] - grouped_country["Recovered"] - grouped_country["Deaths"]
grouped_country["log_confirmed"] = np.log(grouped_country["Confirmed"])
grouped_country["log_active"] = np.log(grouped_country["Active Cases"])

grouped_country

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Recovered,Deaths,Active Cases,log_confirmed,log_active
Country/Region,ObservationDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Azerbaijan,2020-02-28,1.0,0.0,0.0,1.0,0.000000,0.000000
"('St. Martin',)",2020-03-10,2.0,0.0,0.0,2.0,0.693147,0.693147
Afghanistan,2020-02-24,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-25,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-26,1.0,0.0,0.0,1.0,0.000000,0.000000
...,...,...,...,...,...,...,...
occupied Palestinian territory,2020-03-12,0.0,0.0,0.0,0.0,-inf,-inf
occupied Palestinian territory,2020-03-14,0.0,0.0,0.0,0.0,-inf,-inf
occupied Palestinian territory,2020-03-15,0.0,0.0,0.0,0.0,-inf,-inf
occupied Palestinian territory,2020-03-16,0.0,0.0,0.0,0.0,-inf,-inf


In [21]:
# Grouping data by observation date
datewise = covid.groupby(["ObservationDate"]).agg({"Confirmed":'sum',
                                                   "Recovered":'sum',
                                                   "Deaths":'sum'})

datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,555.0,28.0,17.0
2020-01-23,653.0,30.0,18.0
2020-01-24,941.0,36.0,26.0
2020-01-25,1438.0,39.0,42.0
2020-01-26,2118.0,52.0,56.0
...,...,...,...
2020-11-11,52126793.0,33926351.0,1284472.0
2020-11-12,52740580.0,34149223.0,1293954.0
2020-11-13,53365400.0,34450332.0,1302556.0
2020-11-14,53927158.0,34726418.0,1311192.0


In [22]:
# Calculating additional information for analysis
datewise["Days Since"] = datewise.index - datewise.index.min()

datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Days Since
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,555.0,28.0,17.0,0 days
2020-01-23,653.0,30.0,18.0,1 days
2020-01-24,941.0,36.0,26.0,2 days
2020-01-25,1438.0,39.0,42.0,3 days
2020-01-26,2118.0,52.0,56.0,4 days
...,...,...,...,...
2020-11-11,52126793.0,33926351.0,1284472.0,294 days
2020-11-12,52740580.0,34149223.0,1293954.0,295 days
2020-11-13,53365400.0,34450332.0,1302556.0,296 days
2020-11-14,53927158.0,34726418.0,1311192.0,297 days


In [23]:
print("Basic Information")
print("Totol number of countries with Disease Spread: ",len(covid["Country/Region"].unique()))
print("Total number of Confirmed Cases around the World: ",datewise["Confirmed"].iloc[-1])
print("Total number of Recovered Cases around the World: ",datewise["Recovered"].iloc[-1])
print("Total number of Deaths Cases around the World: ",datewise["Deaths"].iloc[-1])
print("Total number of Active Cases around the World: ",(datewise["Confirmed"].iloc[-1]-datewise["Recovered"].iloc[-1]-datewise["Deaths"].iloc[-1]))
print("Total number of Closed Cases around the World: ",datewise["Recovered"].iloc[-1]+datewise["Deaths"].iloc[-1])
print("Number of Confirmed Cases in last 24 hours: ",datewise["Confirmed"].iloc[-1]-datewise["Confirmed"].iloc[-2])
print("Number of Recovered Cases in last 24 hours: ",datewise["Recovered"].iloc[-1]-datewise["Recovered"].iloc[-2])
print("Number of Death Cases in last 24 hours: ",datewise["Deaths"].iloc[-1]-datewise["Deaths"].iloc[-2])

Basic Information
Totol number of countries with Disease Spread:  226
Total number of Confirmed Cases around the World:  54370186.0
Total number of Recovered Cases around the World:  34955148.0
Total number of Deaths Cases around the World:  1317139.0
Total number of Active Cases around the World:  18097899.0
Total number of Closed Cases around the World:  36272287.0
Number of Confirmed Cases in last 24 hours:  443028.0
Number of Recovered Cases in last 24 hours:  228730.0
Number of Death Cases in last 24 hours:  5947.0


In [24]:
# Plotting distribution of active cases over time
fig = px.bar(x=datewise.index, y=datewise["Confirmed"] - datewise["Recovered"] - datewise["Deaths"])
fig.update_layout(title="Distribution of Number of Active Cases", xaxis_title="Date", yaxis_title="Number of Cases",)
fig.show()

In [14]:
# Analyzing COVID-19 data for India
india_data = covid[covid["Country/Region"] == "India"]

india_data

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
430,431,2020-01-30,,India,1/30/20 16:00,1.0,0.0,0.0
491,492,2020-01-31,,India,1/31/2020 23:59,1.0,0.0,0.0
547,548,2020-02-01,,India,1/31/2020 8:15,1.0,0.0,0.0
607,608,2020-02-02,,India,2020-02-02T06:03:08,2.0,0.0,0.0
672,673,2020-02-03,,India,2020-02-03T21:43:02,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...
60885,60886,2020-07-10,Kerala,India,2020-07-11 04:34:26,6950.0,27.0,3820.0
60910,60911,2020-07-10,Ladakh,India,2020-07-11 04:34:26,1064.0,1.0,917.0
60926,60927,2020-07-10,Madhya Pradesh,India,2020-07-11 04:34:26,16657.0,638.0,12481.0
60932,60933,2020-07-10,Maharashtra,India,2020-07-11 04:34:26,238461.0,9893.0,132625.0


In [15]:
datewise_india = india_data.groupby(["ObservationDate"]).agg({"Confirmed": 'sum', "Recovered": 'sum', "Deaths": 'sum'})

datewise_india

Unnamed: 0_level_0,Confirmed,Recovered,Deaths
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-30,1.0,0.0,0.0
2020-01-31,1.0,0.0,0.0
2020-02-01,1.0,0.0,0.0
2020-02-02,2.0,0.0,0.0
2020-02-03,3.0,0.0,0.0
...,...,...,...
2020-07-06,719664.0,439934.0,20159.0
2020-07-07,742417.0,456831.0,20642.0
2020-07-08,767296.0,476378.0,21129.0
2020-07-09,793802.0,495513.0,21604.0


In [16]:
# Displaying India-specific information
print(datewise_india.iloc[-1])
print("Total Active Cases: ", datewise_india["Confirmed"].iloc[-1] - datewise_india["Recovered"].iloc[-1] - datewise_india["Deaths"].iloc[-1])
print("Total Closed Cases: ", datewise_india["Recovered"].iloc[-1] + datewise_india["Deaths"].iloc[-1])

Confirmed    543857.0
Recovered    338805.0
Deaths        17380.0
Name: 2020-07-10 00:00:00, dtype: float64
Total Active Cases:  187672.0
Total Closed Cases:  356185.0


In [26]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Example data (replace with your actual data)
datewise_india = pd.DataFrame({
    "Confirmed": np.random.randint(1000, 10000, size=100),
    "Recovered": np.random.randint(500, 8000, size=100),
    "Deaths": np.random.randint(100, 2000, size=100)
}, index=pd.date_range(start="2020-01-01", periods=100))

# Plotting growth of different types of cases in India
fig = go.Figure()

fig.add_trace(go.Scatter(x=datewise_india.index, y=datewise_india["Confirmed"],
                        mode='lines+markers',
                        name='Confirmed Cases'))
fig.add_trace(go.Scatter(x=datewise_india.index, y=datewise_india["Recovered"],
                        mode='lines+markers',
                        name='Recovered Cases'))
fig.add_trace(go.Scatter(x=datewise_india.index, y=datewise_india["Deaths"],
                        mode='lines+markers',
                        name='Death Cases'))
fig.update_layout(title="Growth of Different Types of Cases in India",
                  xaxis_title="Date", yaxis_title="Number of Cases",
                  legend=dict(x=0, y=1, traceorder="normal"))

fig.show()


In [27]:
# Feature engineering for machine learning models
datewise["Days Since"] = datewise.index - datewise.index[0]
datewise["Days Since"] = datewise["Days Since"].dt.days

datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Days Since
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,555.0,28.0,17.0,0
2020-01-23,653.0,30.0,18.0,1
2020-01-24,941.0,36.0,26.0,2
2020-01-25,1438.0,39.0,42.0,3
2020-01-26,2118.0,52.0,56.0,4
...,...,...,...,...
2020-11-11,52126793.0,33926351.0,1284472.0,294
2020-11-12,52740580.0,34149223.0,1293954.0,295
2020-11-13,53365400.0,34450332.0,1302556.0,296
2020-11-14,53927158.0,34726418.0,1311192.0,297


In [28]:
# Splitting data into training and validation sets
train_ml = datewise.iloc[:int(datewise.shape[0] * 0.95)]
valid_ml = datewise.iloc[int(datewise.shape[0] * 0.95):]
model_scores = []

In [31]:
# Linear Regression model
lin_reg = LinearRegression()
lin_reg.fit(np.array(train_ml["Days Since"]).reshape(-1, 1), np.array(train_ml["Confirmed"]).reshape(-1, 1))

# Making predictions using Linear Regression
prediction_valid_linreg = lin_reg.predict(np.array(valid_ml["Days Since"]).reshape(-1, 1))

# Calculating and displaying RMSE & R2 for Linear Regression
model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_linreg)))
print("Root Mean Square Error for Linear Regression: ", np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_linreg)))

Root Mean Square Error for Linear Regression:  14366505.585641913


In [32]:
# Plotting Linear Regression predictions
plt.figure(figsize=(11,6))

prediction_linreg = lin_reg.predict(np.array(datewise["Days Since"]).reshape(-1,1))

linreg_output=[]

for i in range(prediction_linreg.shape[0]):
    linreg_output.append(prediction_linreg[i][0])

fig=go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                    mode='lines+markers',name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=linreg_output,
                    mode='lines',name="Linear Regression Best Fit Line",
                    line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Linear Regression Prediction",
                 xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.show()

<Figure size 1100x600 with 0 Axes>

In [23]:
poly = PolynomialFeatures(degree = 8)

train_poly=poly.fit_transform(np.array(train_ml["Days Since"]).reshape(-1,1))
valid_poly=poly.fit_transform(np.array(valid_ml["Days Since"]).reshape(-1,1))
y=train_ml["Confirmed"]

linreg=LinearRegression()
linreg.fit(train_poly,y)

prediction_poly=linreg.predict(valid_poly)
rmse_poly=np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_poly))
model_scores.append(rmse_poly)
print("Root Mean Squared Error for Polynomial Regression: ",rmse_poly)

Root Mean Squared Error for Polynomial Regression:  1335106.8355698513


In [34]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Example DataFrame (replace with your actual data)
datewise = pd.DataFrame({
    "Days Since": np.arange(100),  # Example feature
    "Confirmed": np.random.randint(1000, 10000, size=100)
}, index=pd.date_range(start="2020-01-01", periods=100))

# Define Polynomial Features and Linear Regression
poly = PolynomialFeatures(degree=3)  # Change degree as needed
linreg = LinearRegression()

# Transform the feature and fit the model
comp_data = poly.fit_transform(np.array(datewise["Days Since"]).reshape(-1, 1))
linreg.fit(comp_data, datewise["Confirmed"])

# Predict using the Polynomial Regression model
predictions_poly = linreg.predict(comp_data)

# Plotting Polynomial Regression predictions
fig = go.Figure()

fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                        mode='lines+markers', name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=predictions_poly,
                        mode='lines', name="Polynomial Regression Best Fit",
                        line=dict(color='black', dash='dot')))

fig.update_layout(title="Confirmed Cases Polynomial Regression Prediction",
                  xaxis_title="Date", yaxis_title="Confirmed Cases",
                  legend=dict(x=0, y=1, traceorder="normal"))

fig.show()


In [None]:
#Intializing SVR Model
svm=SVR(C=1,degree=6,kernel='poly',epsilon=0.01)

#Fitting model on the training data
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),np.array(train_ml["Confirmed"]).reshape(-1,1))

prediction_valid_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))

model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_svm)))
print("Root Mean Square Error for Support Vectore Machine: ",np.sqrt(mean_squared_error(valid_ml["Confirmed"],prediction_valid_svm)))

Root Mean Square Error for Support Vectore Machine:  10286744.183962515


In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVR  # Support Vector Regressor (use SVC for classification)

# Example data (replace with your actual data)
datewise = pd.DataFrame({
    "Days Since": np.arange(100),  # Days since some start date
    "Confirmed": np.random.randint(100, 1000, size=100)  # Replace with actual confirmed cases data
})

# Initialize and train the SVM model
svm = SVR(kernel='rbf')  # You can choose different kernels, e.g., 'linear', 'poly', 'rbf'
X = np.array(datewise["Days Since"]).reshape(-1, 1)
y = datewise["Confirmed"]
svm.fit(X, y)

# Predict using the trained SVM model
prediction_svm = svm.predict(X)

# Plotting SVM predictions
plt.figure(figsize=(11, 6))

fig = go.Figure()
fig.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                         mode='lines+markers', name="Train Data for Confirmed Cases"))
fig.add_trace(go.Scatter(x=datewise.index, y=prediction_svm,
                         mode='lines', name="Support Vector Machine Best fit Kernel",
                         line=dict(color='black', dash='dot')))
fig.update_layout(title="Confirmed Cases Support Vector Machine Regressor Prediction",
                  xaxis_title="Date", yaxis_title="Confirmed Cases", 
                  legend=dict(x=0, y=1, traceorder="normal"))
fig.show()


<Figure size 1100x600 with 0 Axes>

In [None]:
# Random Forest Regression model
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest.fit(np.array(train_ml["Days Since"]).reshape(-1, 1), np.array(train_ml["Confirmed"]).reshape(-1, 1))

# Making predictions using Random Forest
prediction_valid_rf = random_forest.predict(np.array(valid_ml["Days Since"]).reshape(-1, 1))

# Calculating and displaying RMSE & R2 for Random Forest
model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_rf)))
print("Root Mean Square Error for Random Forest: ", np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_rf)))

Root Mean Square Error for Random Forest:  5300808.968526851


In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor  # or RandomForestClassifier if using classification

# Example data (replace with your actual data)
datewise = pd.DataFrame({
    "Days Since": np.arange(100),  # Days since some start date
    "Confirmed": np.random.randint(100, 1000, size=100)  # Replace with actual confirmed cases data
})

# Initialize and train the Random Forest model
random_forest = RandomForestRegressor()  # Use RandomForestClassifier if you're doing classification
X = np.array(datewise["Days Since"]).reshape(-1, 1)
y = datewise["Confirmed"]
random_forest.fit(X, y)

# Predict using the trained Random Forest model
prediction_rf = random_forest.predict(X)

# Plotting Random Forest predictions
plt.figure(figsize=(11, 6))

fig_rf = go.Figure()
fig_rf.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                            mode='lines+markers', name="Train Data for Confirmed Cases"))
fig_rf.add_trace(go.Scatter(x=datewise.index, y=prediction_rf,
                            mode='lines', name="Random Forest Predictions",
                            line=dict(color='black', dash='dot')))
fig_rf.update_layout(title="Confirmed Cases Random Forest Prediction",
                     xaxis_title="Date", yaxis_title="Confirmed Cases", 
                     legend=dict(x=0, y=1, traceorder="normal"))
fig_rf.show()


<Figure size 1100x600 with 0 Axes>

In [None]:
# Decision Tree Regression model
decision_tree = DecisionTreeRegressor(random_state=42)
decision_tree.fit(np.array(train_ml["Days Since"]).reshape(-1, 1), np.array(train_ml["Confirmed"]).reshape(-1, 1))

# Making predictions using Decision Tree
prediction_valid_dt = decision_tree.predict(np.array(valid_ml["Days Since"]).reshape(-1, 1))

# Calculating and displaying RMSE & R2 for Decision Tree
model_scores.append(np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_dt)))
print("Root Mean Square Error for Decision Tree: ", np.sqrt(mean_squared_error(valid_ml["Confirmed"], prediction_valid_dt)))

Root Mean Square Error for Decision Tree:  4995982.630595267


In [6]:
# Plotting Decision Tree predictions
plt.figure(figsize=(11, 6))

prediction_dt = decision_tree.predict(np.array(datewise["Days Since"]).reshape(-1, 1))

fig_dt = go.Figure()
fig_dt.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                            mode='lines+markers', name="Train Data for Confirmed Cases"))
fig_dt.add_trace(go.Scatter(x=datewise.index, y=prediction_dt,
                            mode='lines', name="Decision Tree Predictions",
                            line=dict(color='black', dash='dot')))
fig_dt.update_layout(title="Confirmed Cases Decision Tree Prediction",
                     xaxis_title="Date", yaxis_title="Confirmed Cases", legend=dict(x=0, y=1, traceorder="normal"))
fig_dt.show()

<Figure size 1100x600 with 0 Axes>

In [None]:
new_date = []
new_prediction_lr = []
new_prediction_poly=[]
new_prediction_svm = []
new_prediction_dt = []
new_prediction_rf = []

for i in range(1, 31):
    new_date.append(datewise.index[-1] + timedelta(days=i))

    # Linear Regression predictions
    new_prediction_lr.append(lin_reg.predict(np.array(datewise["Days Since"].max() + i).reshape(-1, 1))[0][0])

    new_date_poly=poly.fit_transform(np.array(datewise["Days Since"].max() + i).reshape(-1,1))
    new_prediction_poly.append(linreg.predict(new_date_poly)[0])

    # Support Vector Machine predictions
    new_prediction_svm.append(svm.predict(np.array(datewise["Days Since"].max() + i).reshape(-1, 1))[0])

    # Decision Tree predictions
    new_prediction_dt.append(decision_tree.predict(np.array(datewise["Days Since"].max() + i).reshape(-1, 1))[0])

    # Random Forest predictions
    new_prediction_rf.append(random_forest.predict(np.array(datewise["Days Since"].max() + i).reshape(-1, 1))[0])

# Displaying predictions in a DataFrame
pd.set_option('display.float_format', lambda x: '%.6f' % x)
model_predictions = pd.DataFrame(zip(new_date, new_prediction_lr, new_prediction_poly, new_prediction_svm, new_prediction_dt, new_prediction_rf),
                                  columns=["Dates", "Linear Regression Prediction", "Polynomial Regression Prediction", "SVM Prediction", "Decision Tree Prediction", "Random Forest Prediction"])

model_predictions

Unnamed: 0,Dates,Linear Regression Prediction,Polynomial Regression Prediction,SVM Prediction,Decision Tree Prediction,Random Forest Prediction
0,2020-11-16,37417973.690946,60797959.872493,69927483.212344,46087107.0,45739687.88
1,2020-11-17,37573075.145143,62085800.071848,71232010.372132,46087107.0,45739687.88
2,2020-11-18,37728176.59934,63433086.70339,72558461.784956,46087107.0,45739687.88
3,2020-11-19,37883278.053537,64842553.850702,73907131.237419,46087107.0,45739687.88
4,2020-11-20,38038379.507734,66317030.496476,75278315.458859,46087107.0,45739687.88
5,2020-11-21,38193480.961931,67859442.915507,76672314.140929,46087107.0,45739687.88
6,2020-11-22,38348582.416128,69472817.109641,78089429.957252,46087107.0,45739687.88
7,2020-11-23,38503683.870325,71160281.285165,79529968.583135,46087107.0,45739687.88
8,2020-11-24,38658785.324522,72925068.373089,80994238.715349,46087107.0,45739687.88
9,2020-11-25,38813886.778719,74770518.592778,82482552.091973,46087107.0,45739687.88


In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Example historical data (replace with your actual data)
datewise = pd.DataFrame({
    "Confirmed": np.random.randint(100, 1000, size=100)  # Replace with your actual confirmed cases data
}, index=pd.date_range(start="2020-01-01", periods=100))

# Example dates for predictions (replace with the actual dates you used for prediction)
new_date = pd.date_range(start="2020-04-01", periods=30)  # Replace with your actual prediction dates

# Example predictions (replace with your actual model predictions)
new_prediction_lr = np.random.randint(100, 1000, size=30)
new_prediction_svm = np.random.randint(100, 1000, size=30)
new_prediction_dt = np.random.randint(100, 1000, size=30)
new_prediction_rf = np.random.randint(100, 1000, size=30)

# Plotting Historical and Predicted Data for Linear Regression, SVM, Decision Tree, and Random Forest
fig_combined = go.Figure()

# Historical data
fig_combined.add_trace(go.Scatter(x=datewise.index, y=datewise["Confirmed"],
                                  mode='lines+markers', name="Historical Data", 
                                  line=dict(color='black', width=2)))

# Linear Regression predictions
fig_combined.add_trace(go.Scatter(x=new_date, y=new_prediction_lr,
                                  mode='lines', name="Linear Regression Predictions",
                                  line=dict(color='red', dash='dot')))

# SVM predictions
fig_combined.add_trace(go.Scatter(x=new_date, y=new_prediction_svm,
                                  mode='lines', name="SVM Predictions",
                                  line=dict(color='green', dash='dot')))

# Decision Tree predictions
fig_combined.add_trace(go.Scatter(x=new_date, y=new_prediction_dt,
                                  mode='lines', name="Decision Tree Predictions",
                                  line=dict(color='blue', dash='dot')))

# Random Forest predictions
fig_combined.add_trace(go.Scatter(x=new_date, y=new_prediction_rf,
                                  mode='lines', name="Random Forest Predictions",
                                  line=dict(color='purple', dash='dot')))

# Updating layout
fig_combined.update_layout(title="Historical and Predicted Data for Different Models",
                           xaxis_title="Date", yaxis_title="Confirmed Cases",
                           legend=dict(x=0, y=1, traceorder="normal"))

# Displaying the plot
fig_combined.show()


In [2]:
import pickle
from sklearn.ensemble import RandomForestClassifier

# Define or load the rfc variable
rfc = RandomForestClassifier()

# Now you can serialize the object
with open('covid.pkl', 'wb') as f:
    pickle.dump(rfc, f)


# New Section