# SVM

In [2]:
import pandas as pd
import numpy as np

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('./drive/My Drive/bank-additional-prepared.csv',
                   delimiter=';', index_col=0)

In [None]:
# df = pd.read_csv('../data/processed/bank-additional-prepared.csv',
#                    delimiter=';', index_col=0)

In [4]:
y = df['y_yes']
X = df.drop(['y_yes'], axis=1)

In [25]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, 
                                                    random_state=5, stratify=y)

## Linear kernel

In [26]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV

svm_model = SVC(kernel='linear', class_weight='balanced')

params = {
            'C': np.arange(0.01, 10, 0.5),
            'probability': [True, False]
         }

skf = StratifiedKFold(n_splits=3, shuffle = True, random_state=42)


grid = GridSearchCV(svm_model, 
                    param_grid = params,  
                    n_jobs = -1, 
                    cv = skf.split(X_train, y_train),
                    scoring='f1')

In [27]:
grid.fit(X_train, y_train)
best_params = grid.best_params_
best_params

{'C': 0.51, 'probability': True}

In [28]:
svm = SVC(kernel='linear', 
          class_weight='balanced',
          C=0.51,
          probability=True)

svm.fit(X_train, y_train)

SVC(C=0.51, break_ties=False, cache_size=200, class_weight='balanced',
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale',
    kernel='linear', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [29]:
y_pred = svm.predict(X_test)

In [30]:
from sklearn.metrics import f1_score, classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.81      0.87      1128
           1       0.30      0.65      0.41       141

    accuracy                           0.79      1269
   macro avg       0.62      0.73      0.64      1269
weighted avg       0.88      0.79      0.82      1269



## Polynomial kernel


In [None]:
svm_model = SVC(kernel='poly', class_weight='balanced')

params = {
            'C': np.arange(0.01, 5, 0.5),
            'probability': [True, False]
        }

skf = StratifiedKFold(n_splits=3, shuffle = True, random_state=42)


grid = GridSearchCV(svm_model, 
                    param_grid = params,  
                    n_jobs = -1, 
                    cv = skf.split(X_train, y_train),
                    scoring='f1_macro')

In [None]:
grid.fit(X_train, y_train)
best_params = grid.best_params_
best_params

In [34]:
svm = SVC(kernel='poly', 
          class_weight='balanced',
          C=0.5,
          degree=3)

svm.fit(X_train, y_train)

SVC(C=0.5, break_ties=False, cache_size=200, class_weight='balanced', coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [35]:
y_pred = svm.predict(X_test)

In [36]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.98      0.94      1128
           1       0.53      0.20      0.29       141

    accuracy                           0.89      1269
   macro avg       0.72      0.59      0.61      1269
weighted avg       0.86      0.89      0.87      1269



### Sigmoid kernel

In [37]:
svm_model = SVC(kernel='sigmoid', class_weight='balanced')

params = {
            'C': np.arange(0.1, 10, 0.5),
            'probability': [True, False]
         }

skf = StratifiedKFold(n_splits=3, shuffle = True, random_state=42)


grid = GridSearchCV(svm_model, 
                    param_grid = params,  
                    n_jobs = -1, 
                    cv = skf.split(X_train, y_train),
                    scoring='f1')

In [38]:
grid.fit(X_train, y_train)
best_params = grid.best_params_
best_params

{'C': 0.1, 'probability': True}

In [39]:
svm = SVC(kernel='sigmoid', 
          class_weight='balanced',
          C=0.1,
          probability=True)

svm.fit(X_train, y_train)

SVC(C=0.1, break_ties=False, cache_size=200, class_weight='balanced', coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [40]:
y_pred = svm.predict(X_test)

In [41]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.98      0.94      1128
           1       0.53      0.20      0.29       141

    accuracy                           0.89      1269
   macro avg       0.72      0.59      0.61      1269
weighted avg       0.86      0.89      0.87      1269



## Radial basis function (RBF) kernel

In [42]:
svm_model = SVC(kernel='rbf', class_weight='balanced')

params = {
            'C': np.arange(0.01, 10, 0.5),
            'probability': [True, False]
         }

skf = StratifiedKFold(n_splits=3, shuffle = True, random_state=42)


grid = GridSearchCV(svm_model, 
                    param_grid = params,  
                    n_jobs = -1, 
                    cv = skf.split(X_train, y_train),
                    scoring='f1')

In [43]:
grid.fit(X_train, y_train)
best_params = grid.best_params_
best_params

{'C': 0.01, 'probability': True}

In [44]:
svm = SVC(kernel='rbf', 
          class_weight='balanced',
          C=0.01,
          probability=True)

svm.fit(X_train, y_train)

SVC(C=0.01, break_ties=False, cache_size=200, class_weight='balanced',
    coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale',
    kernel='rbf', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [46]:
y_pred = svm.predict(X_test)

In [47]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.98      0.94      1128
           1       0.53      0.20      0.29       141

    accuracy                           0.89      1269
   macro avg       0.72      0.59      0.61      1269
weighted avg       0.86      0.89      0.87      1269

