In [1]:
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [2]:
train = pd.read_csv('trainFSOut.csv', sep = ',')

In [4]:
X = train.drop('V1', axis = 1)
Y = train['V1']


(77609,)

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
svr_model = SVR(
    C=1.0,  # Regularization parameter
    kernel='rbf',  # Kernel type: 'linear', 'poly', 'rbf', etc.
    gamma='scale',  # Kernel coefficient for 'rbf', 'poly' and 'sigmoid'
    degree=3,  # Degree of the polynomial kernel function ('poly' only)
)

In [8]:
svr_model.fit(X_train, y_train)

In [9]:
y_pred = svr_model.predict(X_test)


In [10]:
y_pred

array([2008.97565118, 2007.29671484, 1992.20819793, ..., 2002.25702709,
       2001.78501277, 2009.23750717])

In [11]:
mse = mean_squared_error(y_test, y_pred)


In [12]:
mse

90.11413502338682

In [32]:
Gs = [0.1, 0.5, 1, 2]
Cs = [0.01, 0.05, 0.1, 0.5, 1, 2, 5]

In [40]:
matrizSVR = [[0 for c in Cs] for g in Gs]

In [47]:
for i in range(0, len(Gs)):
  for j in range(0,len(Cs)):
    gamma_act = Gs[i]
    cost_act = Cs[j]
    svr_model = SVR(
      C=cost_act,
      kernel='rbf',
      gamma=gamma_act,
      degree=3)

    print('Creando C',cost_act, 'G',gamma_act)
    svr_model.fit(X_train, y_train)

    y_pred = svr_model.predict(X_test)
    matrizSVR[i][j]=mean_squared_error(y_test, y_pred)

Creando C 0.01 G 0.1
Creando C 0.05 G 0.1
Creando C 0.1 G 0.1
Creando C 0.5 G 0.1
Creando C 1 G 0.1
Creando C 2 G 0.1
Creando C 5 G 0.1
Creando C 0.01 G 0.5
Creando C 0.05 G 0.5
Creando C 0.1 G 0.5
Creando C 0.5 G 0.5
Creando C 1 G 0.5
Creando C 2 G 0.5
Creando C 5 G 0.5
Creando C 0.01 G 1
Creando C 0.05 G 1
Creando C 0.1 G 1
Creando C 0.5 G 1
Creando C 1 G 1
Creando C 2 G 1
Creando C 5 G 1
Creando C 0.01 G 2
Creando C 0.05 G 2
Creando C 0.1 G 2
Creando C 0.5 G 2
Creando C 1 G 2
Creando C 2 G 2
Creando C 5 G 2


In [48]:
matrizSVR

[[116.23135571079038,
  106.41568526355559,
  103.269079069079,
  97.1586259637519,
  94.53746826791442,
  91.86502602651697,
  87.91360361501707],
 [128.147398085782,
  126.59515809082694,
  125.10992017856447,
  119.86749941185604,
  118.18003274698955,
  115.34293528098395,
  109.7093982003069],
 [128.50951266892815,
  128.22972047957182,
  127.88381264005167,
  125.44053781557311,
  125.15913013965196,
  123.6115264308816,
  119.32241654383542],
 [128.53662445193436,
  128.36490481755226,
  128.1517042383184,
  126.52044193358327,
  127.01094224198857,
  126.26871540053344,
  123.15518726637389]]

In [49]:
import numpy as np

In [60]:
matrizSVR

[[116.23135571079038,
  106.41568526355559,
  103.269079069079,
  97.1586259637519,
  94.53746826791442,
  91.86502602651697,
  87.91360361501707],
 [128.147398085782,
  126.59515809082694,
  125.10992017856447,
  119.86749941185604,
  118.18003274698955,
  115.34293528098395,
  109.7093982003069],
 [128.50951266892815,
  128.22972047957182,
  127.88381264005167,
  125.44053781557311,
  125.15913013965196,
  123.6115264308816,
  119.32241654383542],
 [128.53662445193436,
  128.36490481755226,
  128.1517042383184,
  126.52044193358327,
  127.01094224198857,
  126.26871540053344,
  123.15518726637389]]

In [55]:
min(matrizSVR)

[116.23135571079038,
 106.41568526355559,
 103.269079069079,
 97.1586259637519,
 94.53746826791442,
 91.86502602651697,
 87.91360361501707]

In [61]:
svr_model = SVR(
    C=5,  # Regularization parameter
    kernel='rbf',  # Kernel type: 'linear', 'poly', 'rbf', etc.
    gamma=0.1,  # Kernel coefficient for 'rbf', 'poly' and 'sigmoid'
    degree=3,  # Degree of the polynomial kernel function ('poly' only)
)

In [63]:
svr_model.fit(X_train, y_train)

In [78]:
svr_model.support_.shape

(65079,)

In [79]:
test = pd.read_csv('testFS.csv', sep = ',')

In [87]:
XTestKaggle = scaler.transform(test)


In [89]:
Y = svr_model.predict(XTestKaggle)

In [91]:
data = {'Id': np.arange(1, len(Y) + 1), 'y': Y}
df = pd.DataFrame(data)

In [92]:
csv_file_path = '/content/resultadosTunedSVM.csv'
df.to_csv(csv_file_path, index=False)