In [4]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import warnings

In [5]:
df=pd.read_csv(r"C:\My projects\mlproject\notebooks\StudentsPerformance.csv")

In [6]:
df.columns

Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score'],
      dtype='object')

In [7]:
x=df.drop(columns=["math score"])
y=df["math score"]

In [9]:
numerical_features=x.select_dtypes(include=np.number).columns
categorical_features=x.select_dtypes(exclude=np.number).columns

In [14]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numeric_transformer=StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor=ColumnTransformer(
    transformers=[
        ("num",numeric_transformer,numerical_features),
        ("cat",categorical_transformer,categorical_features)
    ]
)

In [15]:
x=preprocessor.fit_transform(x)

In [17]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)   

In [18]:
def evaluate_model(true,predicted):
    r2_square=r2_score(true,predicted)
    mean_absolute_error_=mean_absolute_error(true,predicted)
    mean_squared_error_=mean_squared_error(true,predicted)
    rmse=np.sqrt(mean_squared_error_)
    return r2_square,mean_absolute_error_,mean_squared_error_,rmse

In [19]:
models={
    "linear regression":LinearRegression(),
    "ridge regression":Ridge(),
    "lasso regression":Lasso(),
    "KNN":KNeighborsRegressor(),
    "decision tree":DecisionTreeRegressor(),
    "random forest":RandomForestRegressor(),
    "adaboost":AdaBoostRegressor(),
    "SVR":SVR(),
    "catboost":CatBoostRegressor(verbose=False),
    "xgboost":XGBRegressor()
}

In [None]:
for name,model in models.items():
    model.fit(x_train,y_train)
    y_train_pred=model.predict(x_train)
    y_test_pred=model.predict(x_test)
    r2_square,mean_absolute_error_,mean_squared_error_,rmse=evaluate_model(y_test,y_test_pred)
    print(f"model name: {name}")
    print(f"r2_square: {r2_square}")
    print(f"mean_absolute_error_: {mean_absolute_error_}")
    print(f"mean_squared_error_: {mean_squared_error_}")
    print(f"rmse: {rmse}")
    print("=====================================================")

model name: linear regression
r2_square: 0.8804332983749565
mean_absolute_error_: 4.214763142474849
mean_squared_error_: 29.095169866715466
rmse: 5.3939938697328405
model name: ridge regression
r2_square: 0.880593148502874
mean_absolute_error_: 4.211100688014259
mean_squared_error_: 29.05627219234827
rmse: 5.3903870169356365
model name: lasso regression
r2_square: 0.825320079562973
mean_absolute_error_: 5.157879138921816
mean_squared_error_: 42.50633235127344
rmse: 6.519688056285626
model name: KNN
r2_square: 0.7837702557426202
mean_absolute_error_: 5.619
mean_squared_error_: 52.617
rmse: 7.253757646902741
model name: decision tree
r2_square: 0.7303549295959865
mean_absolute_error_: 6.365
mean_squared_error_: 65.615
rmse: 8.100308636095294
model name: random forest
r2_square: 0.8548970978769407
mean_absolute_error_: 4.584021428571429
mean_squared_error_: 35.30910803798186
rmse: 5.942146753319196
model name: adaboost
r2_square: 0.8455933076970789
mean_absolute_error_: 4.7759685222234864

: 