Use RBF SVM for training and calculate its performance.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [None]:
# load Data
data = pd.read_csv('3clean_hotel_data.csv')

In [None]:
# Divide features and target variables
X = data.drop('is_canceled', axis=1)
y = data['is_canceled']

# Split the data into training and test sets (70% training, 30% test)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Standardize the data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
# Define the candidate list of C values
param_grid = {
    'C': [0.1, 1],
    'gamma': [0.1, 0.01],
    'kernel': ['rbf']
}

# Grid search
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid.fit(x_train_scaled, y_train)

# Use the best parameters to predict on the test set
best_model = grid.best_estimator_
y_pred = best_model.predict(x_test_scaled)

# calculate its performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
best_model

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.842 total time= 7.4min
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.838 total time= 7.4min
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.845 total time= 6.9min
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.843 total time= 6.2min
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.842 total time= 6.8min
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.784 total time= 4.6min
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.782 total time= 5.6min
[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.783 total time= 4.7min
[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.784 total time= 4.4min
[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.784 total time= 4.6min
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.907 total time= 9.2min
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;,