In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2
from sklearn.impute import SimpleImputer
import seaborn as sns
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_predict,RandomizedSearchCV,validation_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials,rand
from hyperopt.pyll.base import scope 
from sklearn.model_selection import cross_validate
import warnings
warnings.simplefilter("ignore")


def mae(y_orig,y_pred):
    abs_error = abs(y_orig-y_pred)
    return (np.mean(abs_error) / np.mean(y_orig))*100
def rmse(y_true,y_pred):
    return (mse(y_true,y_pred))**0.5


In [None]:
# Importing, Imputing, Spliting and Scaling

df = pd.read_excel('DATA.xlsx',dtype=np.longfloat)
imputer = SimpleImputer(missing_values = np.nan, strategy ='mean')
df = imputer.fit_transform(df)
X = df[:,:-1]
y = df[:,9]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.10,random_state=22)
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=0.10,random_state=22)

scaler = StandardScaler()
X_train_sca = scaler.fit_transform(X_train)
X_val_sca = scaler.transform(X_val)
X_test_sca = scaler.transform(X_test)


In [None]:
# Running SVR and Tuning it

model = SVR()
model.fit(X_train_sca,y_train)
print("Initial Model")

print("---------------------------------------------")
print("Training Score")
print("R2 -> ",r2(y_train,model.predict(X_train_sca)))
print("MSE -> ",mse(y_train,model.predict(X_train_sca)))
print("RMSE -> ",rmse(y_train,model.predict(X_train_sca)))
print("MAE -> ",mae(y_train,model.predict(X_train_sca)))


print("---------------------------------------------")
print("Validation Score")
print("R2 -> ",r2(y_val,model.predict(X_val_sca)))
print("MSE -> ",mse(y_val,model.predict(X_val_sca)))
print("RMSE -> ",rmse(y_val,model.predict(X_val_sca)))
print("MAE -> ",mae(y_val,model.predict(X_val_sca)))


print("---------------------------------------------")
print("Test Score")
print("R2 -> ",r2(y_test,model.predict(X_test_sca)))
print("MSE -> ",mse(y_test,model.predict(X_test_sca)))
print("RMSE -> ",rmse(y_test,model.predict(X_test_sca)))
print("MAE -> ",mae(y_test,model.predict(X_test_sca)))

MSE = make_scorer(mse)
MAEP = make_scorer(mae)
RMSE = make_scorer(rmse)
scoring = {'R2': 'r2', 'MSE': MSE,'RMSE':RMSE,'MAEP':MAEP}
cv = KFold(n_splits=6,random_state=19,shuffle=True)

space = {
        'C': hp.uniform('C',1e-3,1),
        'gamma': hp.uniform('gamma',1e-3,1e2),
        'epsilon': hp.uniform ('epsilon', 1e-3,1e3),
        
    }

def objective(space):
    model = SVR(C=space['C'],gamma=space['gamma'],epsilon=space['epsilon'])
    
    temp = cross_validate(model, X_train_sca, y_train, cv = cv,scoring= MSE)
    mean_sq = temp['test_score'].mean()

    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss': mean_sq, 'status': STATUS_OK }
    
trials = Trials()
best = fmin(fn= objective,
            space= space,
            algo= tpe.suggest,
            trials= trials,
               max_evals=70)
best

C = 584.6570888436889
epsilon=0.5906437756604672
gamma=0.27102404571990846



model = SVR(C=C,epsilon=epsilon,gamma=gamma)
model.fit(X_train_sca,y_train)
print("Final Model")
print("---------------------------------------------")
print("Test Score")
print("R2 -> ",r2(y_test,model.predict(X_test_sca)))
print("MSE -> ",mse(y_test,model.predict(X_test_sca)))
print("RMSE -> ",rmse(y_test,model.predict(X_test_sca)))
print("MAEP -> ",mae(y_test,model.predict(X_test_sca)))

print("---------------------------------------------")

model = SVR(C=C,epsilon=epsilon,gamma=gamma)

t = cross_validate(model,X_train_sca,y_train,cv=cv,return_train_score=True,scoring=scoring)


print("Training Score")
print("R2 -> ",t['train_R2'].mean())
print("MSE -> ",t['train_MSE'].mean())
print("RMSE -> ",t['train_RMSE'].mean())
print("MAEP -> ",t['train_MAEP'].mean())

print("---------------------------------------------")

print("Validation Score")
print("R2 -> ",t['test_R2'].mean())
print("MSE -> ",t['test_MSE'].mean())
print("RMSE -> ",t['test_RMSE'].mean())
print("MAEP -> ",t['test_MAEP'].mean())
print("---------------------------------------------")