In [9]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression,Lasso
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV 
from catboost import CatBoostRegressor 
from xgboost import XGBRegressor 
import warnings 

In [11]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [12]:
df=pd.read_csv("student-mat.csv")

In [15]:
x=df.drop('G3', axis=1)
x.head(5)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,no,4,3,4,1,1,3,6,5,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,no,5,3,3,1,1,3,4,5,5
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,no,4,3,2,2,3,3,10,7,8
3,GP,F,15,U,GT3,T,4,2,health,services,...,yes,3,2,2,1,1,5,2,15,14
4,GP,F,16,U,GT3,T,3,3,other,other,...,no,4,3,2,1,2,5,4,6,10


In [17]:
y=df['G3']
y.head()

0     6
1     6
2    10
3    15
4    10
Name: G3, dtype: int64

In [19]:
num_features = x.select_dtypes(exclude="object").columns
cat_features = x.select_dtypes(include="object").columns 
print(num_features)
print(cat_features)

Index(['age', 'Medu', 'Fedu', 'traveltime', 'studytime', 'failures', 'famrel',
       'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2'],
      dtype='object')
Index(['school', 'sex', 'address', 'famsize', 'Pstatus', 'Mjob', 'Fjob',
       'reason', 'guardian', 'schoolsup', 'famsup', 'paid', 'activities',
       'nursery', 'higher', 'internet', 'romantic'],
      dtype='object')


In [20]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
cat_transformer=OneHotEncoder()

preprocessor=ColumnTransformer(
    [
        ("OneHotEncoder",cat_transformer,cat_features),
        ("StandardScaler",numeric_transformer,num_features)         
    ]
)

In [21]:
x=preprocessor.fit_transform(x)

In [22]:
x

array([[ 1.        ,  0.        ,  1.        , ...,  0.03642446,
        -1.78246688, -1.25479105],
       [ 1.        ,  0.        ,  1.        , ..., -0.21379577,
        -1.78246688, -1.52097927],
       [ 1.        ,  0.        ,  1.        , ...,  0.53686493,
        -1.1791469 , -0.72241461],
       ...,
       [ 0.        ,  1.        ,  0.        , ..., -0.33890588,
        -0.27416693, -0.72241461],
       [ 0.        ,  1.        ,  0.        , ..., -0.71423623,
         0.02749306,  0.34233827],
       [ 0.        ,  1.        ,  0.        , ..., -0.08868565,
        -0.87748691, -0.45622639]])

In [24]:
x.shape

(395, 58)

In [28]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
print(x_train.shape)
print(x_test.shape)    

(316, 58)
(79, 58)


In [33]:
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

In [29]:

def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, mse, rmse, r2_square


In [35]:
models = {
    "Linear Regression": LinearRegression(),
    "Lasso": Lasso(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "XGBRegressor": XGBRegressor(), 
    "CatBoosting Regressor": CatBoostRegressor(verbose=False),
    "AdaBoost Regressor": AdaBoostRegressor()
}
model_list = []
r2_list =[]

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(x_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)
    
    # Evaluate Train and Test dataset
    model_train_mae, model_train_mse, model_train_rmse, model_train_r2 = evaluate_model(y_train, y_train_pred)

    model_test_mae, model_test_mse, model_test_rmse, model_test_r2 = evaluate_model(y_test, y_test_pred)

    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print('Model performance for Training set')
    print("- Root Mean Squared Error: {:.4f}".format(model_train_rmse))
    print("- Mean Squared Error: {:.4f}".format(model_train_mse))
    print("- Mean Absolute Error: {:.4f}".format(model_train_mae))
    print("- R2 Score: {:.4f}".format(model_train_r2))

    print('----------------------------------')
    print('Model performance for Test set')
    print("- Root Mean Squared Error: {:.4f}".format(model_test_rmse))
    print("- Mean Squared Error: {:.4f}".format(model_test_rmse))
    print("- Mean Absolute Error: {:.4f}".format(model_test_mae))
    print("- R2 Score: {:.4f}".format(model_test_mae))
    r2_list.append(model_test_r2) 
    print('='*35) 
    print('\n') 

Linear Regression
Model performance for Training set
- Root Mean Squared Error: 1.6934
- Mean Squared Error: 2.8676
- Mean Absolute Error: 1.1778
- R2 Score: 0.8635
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 2.3866
- Mean Squared Error: 2.3866
- Mean Absolute Error: 1.6464
- R2 Score: 1.6464


Lasso
Model performance for Training set
- Root Mean Squared Error: 2.1665
- Mean Squared Error: 4.6936
- Mean Absolute Error: 1.3234
- R2 Score: 0.7765
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 2.2270
- Mean Squared Error: 2.2270
- Mean Absolute Error: 1.4186
- R2 Score: 1.4186


K-Neighbors Regressor
Model performance for Training set
- Root Mean Squared Error: 2.3140
- Mean Squared Error: 5.3546
- Mean Absolute Error: 1.6918
- R2 Score: 0.7451
----------------------------------
Model performance for Test set
- Root Mean Squared Error: 3.1160
- Mean Squared Error: 3.1160
- Mean Absolute Error: 2