In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    BayesianRidge,
    HuberRegressor
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    ExtraTreesRegressor,
    BaggingRegressor
)
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.dummy import DummyRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



In [2]:
df=pd.read_csv('DATA/StudentsPerformance.csv')

In [5]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,total score
0,female,group B,bachelor's degree,standard,none,72,72,74,218
1,female,group C,some college,standard,completed,69,90,88,247
2,female,group B,master's degree,standard,none,90,95,93,278
3,male,group A,associate's degree,free/reduced,none,47,57,44,148
4,male,group C,some college,standard,none,76,78,75,229


In [4]:
df['total score']=df['math score']+df['reading score']+df['writing score']

In [7]:
X=df.drop(columns=['total score'],axis=1)
y=df['total score']

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [10]:
X_train.shape,X_test.shape

((800, 8), (200, 8))

In [11]:
### Onehotencoding all catagory columes and standardscaler all numerial columes
cat_feature = X.select_dtypes(include='object').columns
num_feature = X.select_dtypes(exclude='object').columns

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

category_transformer=OneHotEncoder(drop='first')
numerical_transformer=StandardScaler()

preprocessor=ColumnTransformer(
    [
         ('OneHotEncoder',category_transformer,cat_feature),
         ('StandardScaler',numerical_transformer,num_feature)
    ]
)

In [12]:
X_train=preprocessor.fit_transform(X_train)

In [13]:
X_test=preprocessor.transform(X_test)

In [None]:
### till to above feature engineering

#### now go for import randomforest model and trainned it

In [26]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet": ElasticNet(),
    "Bayesian Ridge": BayesianRidge(),
    "Huber Regressor": HuberRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Extra Trees": ExtraTreesRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "Bagging Regressor": BaggingRegressor(),
    "Support Vector Regressor": SVR(),
    "KNN Regressor": KNeighborsRegressor(),
    "Gaussian Process": GaussianProcessRegressor(),
    "Passive Aggressive": PassiveAggressiveRegressor(),
    "Dummy Regressor": DummyRegressor()
}

In [30]:
def performance_metrix(name, model, X_train, y_train, X_test, y_test):
    print(f"\n✅ {name} trained successfully and here is the score:")
    
    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Train performance
    train_mae = mean_absolute_error(y_train, y_train_pred)
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    train_r2 = r2_score(y_train, y_train_pred)
    
    # Test performance
    test_mae = mean_absolute_error(y_test, y_test_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    test_r2 = r2_score(y_test, y_test_pred)

    print(f"📊 Train: MAE={train_mae:.2f}, RMSE={train_rmse:.2f}, R²={train_r2:.2f}")
    print(f"📊 Test : MAE={test_mae:.2f}, RMSE={test_rmse:.2f}, R²={test_r2:.2f}")
    
    # Optional: return metrics as dictionary
    return {
        "Train MAE": train_mae,
        "Train RMSE": train_rmse,
        "Train R2": train_r2,
        "Test MAE": test_mae,
        "Test RMSE": test_rmse,
        "Test R2": test_r2
    }

In [32]:
for name, model in models.items():
    model.fit(X_train, y_train)
    metrics = performance_metrix(name, model, X_train, y_train, X_test, y_test)



✅ Linear Regression trained successfully and here is the score:
📊 Train: MAE=0.00, RMSE=0.00, R²=1.00
📊 Test : MAE=0.00, RMSE=0.00, R²=1.00

✅ Ridge Regression trained successfully and here is the score:
📊 Train: MAE=0.02, RMSE=0.02, R²=1.00
📊 Test : MAE=0.02, RMSE=0.03, R²=1.00

✅ Lasso Regression trained successfully and here is the score:
📊 Train: MAE=0.84, RMSE=1.06, R²=1.00
📊 Test : MAE=0.87, RMSE=1.10, R²=1.00

✅ ElasticNet trained successfully and here is the score:
📊 Train: MAE=5.61, RMSE=7.02, R²=0.97
📊 Test : MAE=5.82, RMSE=7.39, R²=0.97

✅ Bayesian Ridge trained successfully and here is the score:
📊 Train: MAE=0.00, RMSE=0.00, R²=1.00
📊 Test : MAE=0.00, RMSE=0.00, R²=1.00

✅ Huber Regressor trained successfully and here is the score:
📊 Train: MAE=1.64, RMSE=5.58, R²=0.98
📊 Test : MAE=1.90, RMSE=6.01, R²=0.98

✅ Decision Tree trained successfully and here is the score:
📊 Train: MAE=0.00, RMSE=0.00, R²=1.00
📊 Test : MAE=3.31, RMSE=4.87, R²=0.99

✅ Random Forest trained succes