In [1]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
df = pd.read_csv('/content/Student_Performance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [3]:
lr_df = df.copy()

lr_df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [15]:
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]
mapper = {'Yes': 1, 'No': 0}
X['Extracurricular Activities_encoded'] = X['Extracurricular Activities'].map(mapper)
X.drop('Extracurricular Activities', axis=1, inplace=True)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, Y_train)

Y_pred = lr.predict(X_test)

print(f"Predictions: {Y_pred}")

accuracy = r2_score(Y_test, Y_pred)
print("R2_Score:", accuracy)

accuracy = mean_squared_error(Y_test, Y_pred)
print("MSE:", accuracy)

rmse = math.sqrt(accuracy)
print("RMSE:", rmse)

Predictions: [54.71185392 22.61551294 47.90314471 ... 16.79341955 63.34327368
 45.94262301]
R2_Score: 0.9889832909573145
MSE: 4.082628398521851
RMSE: 2.020551508505005


In [5]:
lr.coef_

array([2.85248393, 1.0169882 , 0.47694148, 0.19183144, 0.60861668])

In [6]:
lr.intercept_

np.float64(-33.921946215556396)

# Making my own multi regression class

In [12]:
class multi_regression:
  def __init__(self):
    self.coef_ = None
    self.intercept_ = None

  def fit(self, X_train, Y_train):
    X_train = np.insert(X_train, 0, 1, axis=1)
    betas = np.linalg.inv(np.dot(X_train.T, X_train)).dot(X_train.T).dot(Y_train)
    self.intercept_ = betas[0]
    self.coef_ = betas[1:]

  def predict(self, y_test):
    y_pred = self.intercept_ + np.dot(X_test, self.coef_)
    return y_pred

In [16]:
mlr = multi_regression()
mlr.fit(X_train, Y_train)
Y_pred = mlr.predict(Y_test)
print(f"Predictions: {Y_pred}")

accuracy = r2_score(Y_test, Y_pred)
print("R2_Score:", accuracy)

accuracy = mean_squared_error(Y_test, Y_pred)
print("MSE:", accuracy)

rmse = math.sqrt(accuracy)
print("RMSE:", rmse)


Predictions: [54.71185392 22.61551294 47.90314471 ... 16.79341955 63.34327368
 45.94262301]
R2_Score: 0.9889832909573144
MSE: 4.082628398521872
RMSE: 2.0205515085050103


array([54.71185392, 22.61551294, 47.90314471, ..., 16.79341955,
       63.34327368, 45.94262301])