In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

In [26]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score,root_mean_squared_error , f1_score
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

In [27]:
data = pd.read_csv('data/stud.csv')
data.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,standard,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [28]:
X = data.drop(columns=['math_score'],axis=1)
y = data['math_score']

In [29]:
num_data = X.select_dtypes(exclude='object').columns
cat_data = X.select_dtypes(include='object').columns

In [30]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [31]:
num_transform = StandardScaler()
cat_transform = OneHotEncoder()

preproccess = ColumnTransformer([
    ('OneHotEncoder',cat_transform,cat_data),
    ('StandardScaler',num_transform,num_data)
])

In [32]:
X = preproccess.fit_transform(X)

In [33]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [34]:
def evaluat_model(a,b):
    mae = mean_absolute_error(a,b)
    mse = mean_squared_error(a,b)
    rmse = root_mean_squared_error(a,b)
    r2 = r2_score(a,b)
    return mae,mse,rmse,r2

In [35]:
models = {
    "Linear Regression": LinearRegression(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "XGBRegressor": XGBRegressor(n_estimators=100, max_depth=5), 
    "CatBoosting Regressor": CatBoostRegressor(verbose=False),
    "AdaBoost Regressor": AdaBoostRegressor()
}
models_list=[]
r2_list=[]

for name,model in models.items():
    train_model = model.fit(X_train,y_train)    
    y_pred = train_model.predict(X_test)
    
    print(f"{name} Accuracy : ")
    mae , mse ,rmse ,r2 = evaluat_model(y_test,y_pred)
    print("MSE : ",mse)
    print("r2_score : ",r2)
    # print(f1_score(y_test,y_train))
    print()

Linear Regression Accuracy : 
MSE :  29.081572227186207
r2_score :  0.8804891779218315

Lasso Accuracy : 
MSE :  42.558198094074925
r2_score :  0.8251069371127824

Ridge Accuracy : 
MSE :  29.040203575167975
r2_score :  0.8806591825409883

K-Neighbors Regressor Accuracy : 
MSE :  52.90259999999999
r2_score :  0.7825965815506307

Decision Tree Accuracy : 
MSE :  60.075
r2_score :  0.7531215788383584

Random Forest Regressor Accuracy : 
MSE :  35.327080041666676
r2_score :  0.8548232418653748

XGBRegressor Accuracy : 
MSE :  40.93837356567383
r2_score :  0.8317636251449585

CatBoosting Regressor Accuracy : 
MSE :  36.25545230655922
r2_score :  0.851008092818235

AdaBoost Regressor Accuracy : 
MSE :  35.70595739816857
r2_score :  0.8532662440528558

