In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV

In [None]:
X_df = pd.read_csv('data/dataset/oversampling/dfu_features_dataset.csv', index_col=0)
y_df = pd.read_csv('data/dataset/oversampling/dfu_labels_dataset.csv', index_col=0)

In [None]:
def find_parameters(features_df: pd.DataFrame, n_features:int) -> pd.DataFrame:
    features = features_df.iloc[:n_features]['Features'].values
    X = X_df[features].to_numpy().astype(np.float32)
    y = y_df.to_numpy().ravel()

    param_grid = {'C': np.logspace(0, 2, 10), 'gamma': np.logspace(-3, 0, 12), 'degree':[3,4,5,6], 'kernel': ['rbf', 'linear', 'poly'],
            'probability': [True], 'random_state': [42]}

    grid = RandomizedSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5, n_jobs=-1, n_iter=100, random_state=42)
    grid.fit(X, y)

    return pd.DataFrame(grid.best_params_, index=[0])

# Lasso

In [None]:
features_df = pd.read_csv('data/features_importance/oversampling/lasso.csv')
n_parameters = [10, 25, 50]

for i in n_parameters:
    df = find_parameters(features_df, i)
    # save best params to csv file
    df.to_csv('data/best_params/svm_lasso_{}.csv'.format(i), index=False)

# Concrete Dropout

In [None]:
features_df = pd.read_csv('data/features_importance/oversampling/concrete_dropout.csv')

n_parameters = [10, 25, 50]

for i in n_parameters:
    df = find_parameters(features_df, i)
    # save best params to csv file
    df.to_csv('data/best_params/svm_concrete_{}.csv'.format(i), index=False)

n_parameters = [10, 25, 50]

# All features

In [None]:
X = X_df.to_numpy().astype(np.float32)
y = y_df.to_numpy().ravel()

param_grid = {'C': np.logspace(0, 2, 10), 'gamma': np.logspace(-3, 0, 12), 'degree':[3,4,5,6], 'kernel': ['rbf', 'linear', 'poly'],
        'probability': [True], 'random_state': [42]}

grid = RandomizedSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5, n_jobs=-1, n_iter=100, random_state=42)
grid.fit(X, y)

df = pd.DataFrame(grid.best_params_, index=[0])
df.to_csv('data/best_params/svm_all_{}.csv'.format(i), index=False)