## Importing libraries

In [85]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from LRScratch import LinearRegression

## Loading and Exploring data

In [86]:
data = pd.read_csv('/content/Student_Performance.csv')

In [87]:
data

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,Yes,4,2,23.0
9996,7,64,Yes,8,5,58.0
9997,6,83,Yes,8,5,74.0
9998,9,97,Yes,7,0,95.0


In [88]:
# check for missing values
print(data.isnull().sum())

Hours Studied                       0
Previous Scores                     0
Extracurricular Activities          0
Sleep Hours                         0
Sample Question Papers Practiced    0
Performance Index                   0
dtype: int64


In [89]:
data.columns

Index(['Hours Studied', 'Previous Scores', 'Extracurricular Activities',
       'Sleep Hours', 'Sample Question Papers Practiced', 'Performance Index'],
      dtype='object')

In [90]:
data.dtypes

Unnamed: 0,0
Hours Studied,int64
Previous Scores,int64
Extracurricular Activities,object
Sleep Hours,int64
Sample Question Papers Practiced,int64
Performance Index,float64


In [91]:
data['Extracurricular Activities'] = data['Extracurricular Activities'].map({'Yes': 1, 'No': 0}) # encoding categorical binary data to numerical

In [92]:
data

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,1,4,2,23.0
9996,7,64,1,8,5,58.0
9997,6,83,1,8,5,74.0
9998,9,97,1,7,0,95.0


## Performing train-test split

In [93]:
# using sklearn to perform train-test split
from sklearn.model_selection import train_test_split

X = data.drop('Performance Index', axis=1).values  # converting to numpy array
y = data['Performance Index'].values  # converting to numpy array

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Making model and predictions

In [94]:
model = LinearRegression(X_train,y_train,lr=0.0001) # fitting the model with training data and hypertuning learning rate
theta_optimal, costfn_list = model.gradient_descent() # runnning gradient descent algo
y_pred = model.predict(X_test) # making predictions on testing data

## Model evaluation

In [95]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred) # mean squared error
r2 = r2_score(y_test, y_pred) # root mean squared score

print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 35.849364205915585
R-squared: 0.9032628061459554
