<a href="https://colab.research.google.com/github/rattlesczck/StudentPerformance/blob/main/Academicperformance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/AcademicPerformance/StudentData.csv")

# Preprocessing steps
df['Gender'].replace({'Female': 'F', 'Male': 'M'}, inplace=True)
df['Roll'].fillna(0, inplace=True)
df['Roll no.'].fillna(0, inplace=True)
df['Gender'].fillna('Unknown', inplace=True)
df['Roll_no'] = (df['Roll'] + df['Roll no.']).astype(int)
df.drop(columns=['Roll', 'Roll no.'], inplace=True)
df['1st'].fillna(df['1st'].mean(), inplace=True)
df['2nd'].fillna(df['2nd'].mean(), inplace=True)
df['3rd'].fillna(df['3rd'].mean(), inplace=True)
df['4th'].fillna(df['4th'].mean(), inplace=True)
df['5th'].fillna(df['5th'].mean(), inplace=True)

# Features and target
X = df[['1st', '2nd', '3rd', '4th']]  # Features
y = df['5th']  # Target (5th semester score)

# Feature Scaling (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=1/3, random_state=21)

# Linear Regression (Standard)
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

# Ridge Regression
model_ridge = Ridge(alpha=1.0)
model_ridge.fit(X_train, y_train)
y_pred_ridge = model_ridge.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

# Lasso Regression
model_lasso = Lasso(alpha=0.1)
model_lasso.fit(X_train, y_train)
y_pred_lasso = model_lasso.predict(X_test)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

# Random Forest Regression
model_rf = RandomForestRegressor(n_estimators=100, random_state=21)
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

# Evaluate all models
print("Linear Regression - R^2: {:.4f}, MSE: {:.4f}".format(r2_lr, mse_lr))
print("Ridge Regression - R^2: {:.4f}, MSE: {:.4f}".format(r2_ridge, mse_ridge))
print("Lasso Regression - R^2: {:.4f}, MSE: {:.4f}".format(r2_lasso, mse_lasso))
print("Random Forest Regression - R^2: {:.4f}, MSE: {:.4f}".format(r2_rf, mse_rf))

# Model Evaluation Results
df_results = pd.DataFrame({
    'Actual': y_test,
    'Predicted_LR': y_pred_lr,
    'Predicted_Ridge': y_pred_ridge,
    'Predicted_Lasso': y_pred_lasso,
    'Predicted_RF': y_pred_rf,
    'Difference_LR': y_test - y_pred_lr,
    'Difference_Ridge': y_test - y_pred_ridge,
    'Difference_Lasso': y_test - y_pred_lasso,
    'Difference_RF': y_test - y_pred_rf
}).reset_index(drop=True)

print(df_results.head())


Linear Regression - R^2: 0.7243, MSE: 0.2584
Ridge Regression - R^2: 0.7231, MSE: 0.2595
Lasso Regression - R^2: 0.7071, MSE: 0.2745
Random Forest Regression - R^2: 0.7185, MSE: 0.2638
   Actual  Predicted_LR  Predicted_Ridge  Predicted_Lasso  Predicted_RF  \
0    7.00      7.148605         7.147410         7.190106      7.354228   
1    8.21      7.635667         7.633923         7.627011      8.087800   
2    8.93      8.745476         8.743960         8.581064      8.894400   
3    8.38      8.904392         8.900124         8.730327      8.985700   
4    8.65      8.048572         8.049331         7.971544      8.103900   

   Difference_LR  Difference_Ridge  Difference_Lasso  Difference_RF  
0      -0.148605         -0.147410         -0.190106      -0.354228  
1       0.574333          0.576077          0.582989       0.122200  
2       0.184524          0.186040          0.348936       0.035600  
3      -0.524392         -0.520124         -0.350327      -0.605700  
4       0.6014

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Gender'].replace({'Female': 'F', 'Male': 'M'}, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Roll'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting 