In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.metrics import root_mean_squared_error,r2_score,mean_squared_error,mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split

In [None]:
%pip install catboost

##Importing the data as Pandas Frame Work

In [None]:
df=pd.read_csv("/content/StudentsPerformance.csv")

In [None]:
df.head()


##Preparing X and Y Variables

In [None]:
##Here We'll try to [predict the maths score]

X = df.drop(columns=['math score'], axis=1)
y = df['math score']

##Create Column Transformer with 3 types of Transformers

In [None]:
num_features=X.select_dtypes(exclude='object').columns
cat_featutres=X.select_dtypes(include='object').columns

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
preprocessor=ColumnTransformer([
    ('OneHotEncoder',OneHotEncoder(),cat_featutres),
    ('StandardScaler',StandardScaler(),num_features)
]
)

In [None]:
X=preprocessor.fit_transform(X)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

##Create an Evaluate Function to give all metrics after model training

In [None]:
def evaluate_model(true,predict):
  mae=mean_absolute_error(true,predict)
  mse=mean_squared_error(true,predict)
  rmse=np.sqrt(mean_squared_error(true,predict))
  r2_square=r2_score(true,predict)
  return mae,rmse,r2_square

In [None]:
models={
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'KNeighboursRegressor':KNeighborsRegressor(),
    'DecisionTreeRegressor':DecisionTreeRegressor(),
    'RandomForestRegressor':RandomForestRegressor(),
    'XGBRegressor':XGBRegressor(),
    'CatBoostRegressor':CatBoostRegressor(verbose=False),
    'AdaBoostRegressor':AdaBoostRegressor()
}
model_list=[]
r2_list=[]
for i in range(len(list(models))):
  model=list(models.values())[i]
  model.fit(X_train,y_train)

  ##MAke Prediction
  y_train_pred=model.predict(X_train)
  y_test_pred=model.predict(X_test)


  ##Evaluating the train and test data with the metricesss part
  model_train_mae,model_train_rmse,model_train_r2=evaluate_model(y_train,y_train_pred)
  model_test_mae,model_test_rmse,model_test_r2=evaluate_model(y_test,y_test_pred)


  print(list(models.keys())[i])
  model_list.append(list(models.keys())[i])
  print('Model performance for Training set')
  print("- Root Mean Squared Error: {:.4f}".format(model_train_rmse))
  print("- Mean Absolute Error: {:.4f}".format(model_train_mae))
  print("- R2 Score: {:.4f}".format(model_train_r2))

  print('----------------------------------')

  print('Model performance for Test set')
  print("- Root Mean Squared Error: {:.4f}".format(model_test_rmse))
  print("- Mean Absolute Error: {:.4f}".format(model_test_mae))
  print("- R2 Score: {:.4f}".format(model_test_r2))
  r2_list.append(model_test_r2)

  print('='*35)
  print('\n')

##Difference between the Predicted and the actual value

In [None]:
lin_model = LinearRegression(fit_intercept=True)
lin_model = lin_model.fit(X_train, y_train)
y_pred = lin_model.predict(X_test)
pred_df=pd.DataFrame({'Actual Value':y_test,'Predicted Value':y_pred,'Difference':y_test-y_pred})
pred_df

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Actual vs. Predicted Values")
plt.show()