In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from sklearn.linear_model import LinearRegression,Lasso,ridge_regression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble._forest import RandomForestRegressor
from xgboost import XGBRFRegressor
from hyperopt import hp
from hyperopt import fmin, tpe, space_eval


In [26]:
data = pd.read_csv('C:/Users/admin/Desktop/student-mlops-project/student/data/raw/Exam_Score_Prediction.csv')


In [None]:
data.drop('student_id',axis=1,inplace=True)
data = pd.get_dummies(data,drop_first=True)


In [28]:
models = [RandomForestRegressor(),LinearRegression(),DecisionTreeRegressor(),XGBRFRegressor(),Lasso(),SVR(),KNeighborsRegressor()]

In [29]:
X = data.drop('exam_score',axis=1)
y = data['exam_score']

In [30]:

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [32]:
df = pd.DataFrame(columns=['model','R2_score','f1_score','Recall_score'])
lis = pd.DataFrame([[2.5,3.0,56,2]],columns=['model','R2_score','f1_score','Recall_score'] )

pd.concat([df,lis],ignore_index=True)

  pd.concat([df,lis],ignore_index=True)


Unnamed: 0,model,R2_score,f1_score,Recall_score
0,2.5,3.0,56,2


In [33]:
print(LinearRegression)

<class 'sklearn.linear_model._base.LinearRegression'>


In [34]:
lr = LinearRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
r2 = r2_score(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test,y_pred)

In [35]:

df = pd.DataFrame(columns=['model','R2_score','MSE','MAE'])


for model in models:
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    print(model)

    r2 = r2_score(y_test,y_pred)
    mse = mean_squared_error(y_test,y_pred)
    mae = mean_absolute_error(y_test,y_pred)
    scores = pd.DataFrame([[model,r2,mse,mae]],columns=['model','R2_score','MSE','MAE'] )

    df = pd.concat([df,scores],ignore_index=True)



RandomForestRegressor()
LinearRegression()
DecisionTreeRegressor()


  df = pd.concat([df,scores],ignore_index=True)


XGBRFRegressor(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bytree=None, device=None,
               early_stopping_rounds=None, enable_categorical=False,
               eval_metric=None, feature_types=None, feature_weights=None,
               gamma=None, grow_policy=None, importance_type=None,
               interaction_constraints=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=None, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=None, n_jobs=None,
               num_parallel_tree=None, objective='reg:squarederror',
               random_state=None, ...)
Lasso()
SVR()
KNeighborsRegressor()


In [36]:
df

Unnamed: 0,model,R2_score,MSE,MAE
0,"(DecisionTreeRegressor(max_features=1.0, rando...",0.68347,113.221715,8.592273
1,LinearRegression(),0.733008,95.502096,7.86307
2,DecisionTreeRegressor(),0.358758,229.370368,12.117565
3,"XGBRFRegressor(base_score=None, booster=None, ...",0.661639,121.030709,8.920569
4,Lasso(),0.688911,111.275605,8.540964
5,SVR(),0.718778,100.592304,8.110396
6,KNeighborsRegressor(),0.486895,183.536162,10.96125


In [37]:
df.sort_values(by=['R2_score'],ascending=False)

Unnamed: 0,model,R2_score,MSE,MAE
1,LinearRegression(),0.733008,95.502096,7.86307
5,SVR(),0.718778,100.592304,8.110396
4,Lasso(),0.688911,111.275605,8.540964
0,"(DecisionTreeRegressor(max_features=1.0, rando...",0.68347,113.221715,8.592273
3,"XGBRFRegressor(base_score=None, booster=None, ...",0.661639,121.030709,8.920569
6,KNeighborsRegressor(),0.486895,183.536162,10.96125
2,DecisionTreeRegressor(),0.358758,229.370368,12.117565


In [63]:
space = {
    'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
    'gamma': hp.choice('gamma', ['scale','auto'])
}



In [64]:
def hyperparameter(params):

    svr = SVR(
        kernel=params['kernel'],
        gamma=params['gamma']
    )
    svr.fit(X_train,y_train)
    y_pred = lr.predict(X_test)
    r2 = r2_score(y_test,y_pred)
    return -r2

In [91]:
best = fmin(hyperparameter,space,algo=tpe.suggest,max_evals=20)

100%|██████████| 20/20 [02:07<00:00,  6.37s/trial, best loss: -0.7330084088180805]


In [93]:
best_params = space_eval(space, best)
best_params

{'gamma': 'auto', 'kernel': 'linear'}

In [94]:
svc = SVR(kernel=best_params['kernel'],gamma=best_params['gamma'])
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
r2_score(y_test,y_pred)

0.7326887694374997