In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from catboost import CatBoostRegressor
from xboost import XGBRegressor
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

ModuleNotFoundError: No module named 'sklearn'

## Import the CsV Data as Pandas Dataframe


In [None]:
df = pd.read_csv('stud.csv')

## preparing X and y variables


In [None]:
X = df.drop(columns=['math score'],axis=1)

In [None]:
y = df['math score']

In [None]:
## create column transformer with 3 types of transformers

num_features = X.select_dtypes(include=['int64', 'float64']).columns
cat_features = X.select_dtypes(include=['object']).columns

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('OneHotEncoder', oh_transformer, cat_features),
        ('Standardscaler', numeric_transformer, num_features)
    ]
)

In [None]:
X = preprocessor.fit_transform(X)

In [None]:
X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")


## create an evaluate function to give all metrics after model training

In [None]:
def evaluate_model(true, pred):
    mse = mean_squared_error(true, pred)
    r2 = r2_score(true, pred)
    mae = mean_absolute_error(true, pred)
    rmse = np.sqrt(mean_squared_error(true, pred))
   
    return mse, r2,rmse

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Random Forest': RandomForestRegressor(),
    'Gradient Boosting': GradientBoostingRegressor(),
    'AdaBoost': AdaBoostRegressor(),
    'Support Vector Regressor': SVR(),
    'K-Neighbors Regressor': KNeighborsRegressor(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'CatBoost Regressor': CatBoostRegressor(verbose=0),
    'XGBoost Regressor': XGBRegressor(eval_metric='rmse')
}
model_list = []
r2_list = []
mse_list = []
rmse_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model_name = list(models.keys())[i]
    model.fit(X_train, y_train)
   ## make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    ## evaluate the train and test dataset
    model_train_mse, model_train_r2,model_train_rmse = evaluate_model(y_train, y_train_pred)
    model_test_mse, model_test_r2,model_test_rmse = evaluate_model(y_test, y_test_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print('Model performance on train dataset')
    print('- Root Mean squared error: ', model_train_rmse)
    print('- R2 score: ', model_train_r2)
    print('- Mean squared error: ', model_train_mse)
    r2_list.append(model_train_r2)

    print('=*35')
    print('\n')
