In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from tqdm import tqdm
import os

In [2]:
os.chdir('D://meridianthe4//PML//Datasets')

In [6]:
boston = pd.read_csv("Boston.csv")
X, y = boston.drop("medv", axis=1), boston["medv"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [8]:
Cs = np.linspace(0.01, 5, 15)
Es = np.linspace(0.01, 5, 15)
scores = []
for c in Cs:
    for e in Es:
        svm = SVR(kernel='linear', C=c, epsilon=e)
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('svm', svm)
        ])
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        scores.append([c, e, mae])
scores_df = pd.DataFrame(scores, columns=['C', 'Epsilon', 'score'])
scores_df.sort_values(by='score', ascending=True)

Unnamed: 0,C,Epsilon,score
31,0.722857,0.366429,2.855234
46,1.079286,0.366429,2.855614
61,1.435714,0.366429,2.857034
76,1.792143,0.366429,2.857343
16,0.366429,0.366429,2.857714
...,...,...,...
10,0.010000,3.574286,3.993570
11,0.010000,3.930714,4.047888
12,0.010000,4.287143,4.082993
13,0.010000,4.643571,4.157902


In [10]:
Cs = np.linspace(0.01, 5, 15)
Es = np.linspace(0.01, 5, 15)
Gs = np.linspace(0.01, 5, 15)
scores = []
for g in tqdm(Gs):
    for c in Cs:
        for e in Es:
            svm = SVR(kernel='rbf', C=c, epsilon=e, gamma=g)
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('svm', svm)
            ])
            pipeline.fit(X_train, y_train)
            y_pred = pipeline.predict(X_test)
            mae = mean_absolute_error(y_test, y_pred)
            scores.append([c, g, e, mae])
scores_df = pd.DataFrame(scores, columns=['C', 'Gamma', 'Epsilon', 'score'])
scores_df.sort_values(by='score', ascending=True)

100%|██████████| 15/15 [00:33<00:00,  2.24s/it]


Unnamed: 0,C,Gamma,Epsilon,score
211,5.000000,0.010000,0.366429,2.577029
210,5.000000,0.010000,0.010000,2.580473
212,5.000000,0.010000,0.722857,2.592183
213,5.000000,0.010000,1.079286,2.594666
196,4.643571,0.010000,0.366429,2.604302
...,...,...,...,...
2264,0.010000,3.574286,5.000000,6.178166
2489,0.010000,3.930714,5.000000,6.178404
2714,0.010000,4.287143,5.000000,6.178595
2939,0.010000,4.643571,5.000000,6.178752


In [12]:
Cs = np.linspace(0.01, 5, 15)
Es = np.linspace(0.01, 5, 15)
degrees = [2, 3, 4, 5]
scores = []
for d in tqdm(degrees):
    for c in Cs:
        for e in Es:
            svm = SVR(kernel='poly', C=c, epsilon=e, degree=d)
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('svm', svm)
            ])
            pipeline.fit(X_train, y_train)
            y_pred = pipeline.predict(X_test)
            mae = mean_absolute_error(y_test, y_pred)
            scores.append([c, d, e, mae])
scores_df = pd.DataFrame(scores, columns=['C', 'Degree', 'Epsilon', 'score'])
scores_df.sort_values(by='score', ascending=True)

100%|██████████| 4/4 [00:06<00:00,  1.73s/it]


Unnamed: 0,C,Degree,Epsilon,score
437,5.00,3,0.722857,2.942023
439,5.00,3,1.435714,2.945462
440,5.00,3,1.792143,2.947102
436,5.00,3,0.366429,2.947534
438,5.00,3,1.079286,2.952415
...,...,...,...,...
11,0.01,2,3.930714,6.088394
10,0.01,2,3.574286,6.088466
9,0.01,2,3.217857,6.088628
13,0.01,2,4.643571,6.092214
