In [1]:
import pandas as pd
import numpy as np
np.random.seed(42)


In [2]:
df = pd.read_csv("data/stud.csv")

In [3]:
df.shape

(1000, 8)

In [4]:
# Prepare X and Y variables
x = df.drop(columns=['math_score'],axis=1)
y = df['math_score']

In [None]:
# create column transformer
num_features = x.select_dtypes(exclude='object').columns
cat_features = x.select_dtypes(include='object').columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    [("OneHotEncoder", oh_transformer, cat_features),
     ("StandardScaler", numeric_transformer, num_features),]
)

x = preprocessor.fit_transform(x)

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y test_size=0.2, random_state=42)

In [5]:
# function for evaluation metric

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)

    return mae, mse, rmse, r2_square

In [None]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
# from catboost import CatBoostRegressor

models = {
    "Linear Regression" : LinearRegression(),
    "Lasso" : Lasso(),
    "Ridge" : Ridge(),
    "K-Neighbors" : KNeighborsRegressor(),
    "Decision Tree Regressor" : DecisionTreeRegressor(),
    "Random Forest Regressor" : RandomForestRegressor(),
    "Adaboost Regressor" : AdaBoostRegressor()

}

model_list  = []
r2_list = []

for model_name, model_obj in models.items():
    model_obj.fit(x_train, y_train)

    y_train_pred = model_obj.predict(x_train)
    y_test_pred = model_obj.predict(x_test)

    model_train_mae, model_train_mse, model_train_rmse, model_train_r2_square = evaluate_model(y_train, y_train_pred)
    model_test_mae, model_test_mse, model_test_rmse, model_test_r2_square = evaluate_model(y_test, y_test_pred)

    print(f"Model name : {model_name}")
    print("model_train_mae, model_train_mse, model_train_rmse, model_train_r2_square")
    print(model_train_mae, model_train_mse, model_train_rmse, model_train_r2_square)
    print("model_test_mae, model_test_mse, model_test_rmse, model_test_r2_square")
    print(model_test_mae, model_test_mse, model_test_rmse, model_test_r2_square)

    model_list.append(model_name)

    r2_list.append(model_test_r2_square)
