# Analysis of Covid Cases and Vaccine in the Philippines using Regression Techniques


#### by Group 2: Chua, Hernandez, Orga, Ramos


## 1. Importing modules and Excel Sheet

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import warnings
from collections import UserDict
from glob import glob
import matplotlib.pyplot as plt
from IPython.display import Image
%matplotlib inline
import matplotlib.dates as mpl_dates
import math




In [None]:
df_cases = pd.read_excel("./Nummets Project.xlsx" , sheet_name="LinearRegression")

In [None]:
df_cases.shape

In [None]:
df_cases

## 2. Data Cleaning

In [None]:
#df_cases["Numerical Mapping"] = "2022-01-" + df_cases["Months Numerical Mapping"].astype(int).astype(str)
df_cases["Months Numerical Mapping"] = df_cases["Months Numerical Mapping"].astype(int).astype(str) 
df_cases

In [None]:
df_cases["Numerical Mapping"] = "null"
for index in df_cases.index:
    value = df_cases["Months Numerical Mapping"][index]
    if len(value) == 1:

        df_cases["Numerical Mapping"][index] = "2022-01-0" + value

    else: 
        df_cases["Numerical Mapping"][index] = "2022-01-" + value

df_cases

In [None]:
df_cases["Numerical Mapping"]

In [None]:
df_cases["Date"] =   pd.to_datetime(df_cases["Numerical Mapping"]) 

In [None]:
df_cases["Date"]

In [None]:
df_cases

In [None]:
df_cases
df_cases.drop(["Months", "Numerical Mapping"], axis = 1, inplace = True) 

In [None]:
df_cases["Months Numerical Mapping"] = df_cases["Months Numerical Mapping"].astype(int)
df_cases

## 3. Data Visualization

In [None]:
# This program was taken from ALEKSANDR MOROZOV123 in the Kagel Website
# https://www.kaggle.com/code/aleksandrmorozov123/time-series-forecasting-with-python/notebook
# Modified at 15/12/2022 for the project purposes of Group 2

pd.options.display.float_format = "{:, .2f}".format
np.set_printoptions (precision = 2)
warnings.filterwarnings ("ignore")

# import lag_plot function
# Lag plots are used to check if a time series is random: random data should not exhibit any structure in the lag plot
from pandas.plotting import lag_plot
sns.set ()

# pass the lag argument and plot the values
# when lag = 1 the plot is essentially data [:-1] vs. data [1:]


# Creates the Lag Plot for the column Covid Cases 2022
print("Covid Cases Lag Plot")
lag_plot (df_cases ["Covid Cases 2022"], lag = 1)


In [None]:
# by increasing the lag, we are checking for the seasonality of the data
print("Covid Cases Lag Plot")
lag_plot (df_cases ["Covid Cases 2022"], lag = 3)

In [None]:
# Creates the Lag Plot for the column Vax Doses 2022
print("Vaccine Doses Lag Plot")
lag_plot (df_cases ["Vax Doses"])

In [None]:
# Code taken from Geek for Geeks
# https://www.geeksforgeeks.org/linear-regression-python-implementation/
# Modified at 15/12/2022 for project purposes of Group 2

def estimate_coef(x, y):
    # number of observations/points
    n = np.size(x)
  
    # mean of x and y vector
    m_x = np.mean(x)
    m_y = np.mean(y)
  
    # calculating cross-deviation and deviation about x
    SS_xy = np.sum(y*x) - n*m_y*m_x
    SS_xx = np.sum(x*x) - n*m_x*m_x
  
    # calculating regression coefficients
    b_1 = SS_xy / SS_xx
    b_0 = m_y - b_1*m_x
  
    return (b_0, b_1)
  
def plot_regression_line(x, y, b):
    # plotting the actual points as scatter plot
    plt.scatter(x, y, color = "m",
               marker = "o", s = 30)
  
    # predicted response vector
    y_pred = b[0] + b[1]*x
  
    # plotting the regression line
    plt.plot(x, y_pred, color = "g")
  
    # putting labels
    plt.xlabel('x')
    plt.ylabel('y')
  
    # function to show plot
    plt.show()
  
def main():
    # observations / data
    Months = df_cases["Months Numerical Mapping"]
    covid_cases = df_cases["Covid Cases 2022"]
    vax_doses = df_cases["Vax Doses"]
    
    
    # estimating coefficients
    b = estimate_coef(Months, covid_cases)
    print("Estimated coefficients:\nb_0 = {}  \
          \nb_1 = {}".format(b[0], b[1]))
  
    # plotting regression line
    print("Linear Regression Plot of Covid Cases")
    plot_regression_line(Months, covid_cases, b)
    
  
    # estimating coefficients
    b = estimate_coef(Months, vax_doses)
    print("Estimated coefficients:\nb_0 = {}  \
          \nb_1 = {}".format(b[0], b[1]))
  
    # plotting regression line
    print("Linear Regression Plot of Vaccine Doses")
    plot_regression_line(Months, vax_doses, b)
  
if __name__ == "__main__":
    main()

#### Observations
1. We can tell from lag plot that Covid Cases are random, thus it's linear regression unreliable.
2. The Vaccine doses column are not random due to a linear trend spotted from the lag plot. When fed into a linear regression model, it has a downward trend. 