In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [15]:
# Load the dataset
data = pd.read_csv('shark_attacks.csv')

# Prepare the data
X = data[['Year', 'Month', 'Temperature', 'IceCreamSales']].values
y = data['SharkAttacks'].values

In [16]:
# Define the MLR class
class MLR:
  def __init__(self):
    self.coef = None
    self.intercept=None
  def fit(self,X_train,y_train):
    X_train = np.insert(X_train,0,1,axis=1)

    betas=np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
    self.intercept = betas[0]
    self.coef =betas[1:]

  def predict(self,X_test):
    y_pred = np.dot(X_test,self.coef) + self.intercept
    return y_pred

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Fit model including IceCreamSales data
mlr = MLR()
mlr.fit(X_train, y_train)
y_pred = mlr.predict(X_test)

In [19]:

# Calculate metrics for model including IceCreamSales data
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [20]:
# Fit model excluding IceCreamSales data
X1 =data[['Year', 'Month', 'Temperature']].values
y = data['SharkAttacks'].values
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1, y, test_size=0.2, random_state=42)
mlr1 = MLR()
mlr1.fit(X_train1, y_train1)
y_pred1 = mlr1.predict(X_test1)

In [21]:

# Calculate metrics for model excluding IceCreamSales data
mse1 = mean_squared_error(y_test1, y_pred1)
r21 = r2_score(y_test1, y_pred1)

In [25]:
#including ICECREAM
mse = np.mean((y_test - y_pred) ** 2)

r_squared = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))


print("Metrics for model including IceCreamSales data:")
print("Mean Squared Error (MSE):", mse)
print("R-squared (R^2) score:", r2)

#Excluding ICECREAM

mse = np.mean((y_test1 - y_pred1) ** 2)

r_squared = 1 - (np.sum((y_test1 - y_pred1) ** 2) / np.sum((y_test1 - np.mean(y_test1)) ** 2))


print("Metrics for model excluding IceCreamSales data:")
print("Mean Squared Error (MSE):", mse1)
print("R-squared (R^2) score:", r21)


Metrics for model including IceCreamSales data:
Mean Squared Error (MSE): 37.675376276640534
R-squared (R^2) score: 0.5000374807627369
Metrics for model excluding IceCreamSales data:
Mean Squared Error (MSE): 43.38138077715389
R-squared (R^2) score: 0.4243172447149657
