In [1]:
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.impute import SimpleImputer, KNNImputer

from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.linear_model import SGDClassifier, RidgeClassifier

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from utils import get_data, plot_cm, filter_columns

%matplotlib inline
pd.set_option('display.max_columns', 50)
matplotlib.rcParams['figure.figsize'] = [15, 15]

#https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC

In [2]:
XY_train, X_test_ori, inverse_target_map = get_data(min_size=None, min_size_test=None, nan_thresh=10, fill_nan=None)
train_columns = list(XY_train.columns)
train_columns.remove("TARGET_NUM")

y_train_ori = XY_train["TARGET_NUM"].values
x_train = XY_train[train_columns].values

min_max_scaler = MinMaxScaler(feature_range=(0, 10)).fit(x_train)
x_train = min_max_scaler.transform(x_train)

imp = SimpleImputer(missing_values=np.nan, strategy='constant', fill_value=-1).fit(x_train)
x_train = imp.transform(x_train)

rs = RobustScaler().fit(x_train)
x_train = rs.transform(x_train)

stand_scaler = StandardScaler().fit(x_train)
x_train = stand_scaler.transform(x_train)

x_train = filter_columns(x_train)

X_train, X_test, y_train, y_test = train_test_split(x_train, y_train_ori, test_size=0.3, stratify=y_train_ori,
                                                    random_state=42)

In [None]:
lsvm = LinearSVC(random_state=42,
          #class_weight="balanced",
          verbose=1)
lsvm = lsvm.fit(X_train, y_train)
print(lsvm.score(X_train, y_train))
print(lsvm.score(X_test, y_test))
plot_cm(lsvm, X_test, y_test)

[LibLinear]....................................................................................................
optimization finished, #iter = 1000

Using -s 2 may be faster (also see FAQ)

Objective value = -339824.725405
nSV = 2586627
..................................

In [None]:
svm_p = SVC(random_state=42,
        kernel='poly',
          #class_weight="balanced",
          verbose=1)
svm_p = svm_p.fit(X_train, y_train)
print(svm_p.score(X_train, y_train))
print(svm_p.score(X_test, y_test))
plot_cm(svm_p, X_test, y_test)

[LibSVM]..........

In [None]:
svm_r = SVC(random_state=42,
           kernel='rbf',
          #class_weight="balanced",
          verbose=1)
svm_r = svm_r.fit(X_train, y_train)
print(svm_r.score(X_train, y_train))
print(svm_r.score(X_test, y_test))
plot_cm(svm_r, X_test, y_test)

In [None]:
svm_s = SVC(random_state=42,
           kernel='sigmoid',
          #class_weight="balanced",
          verbose=1)
svm_s = svm_s.fit(X_train, y_train)
print(svm_s.score(X_train, y_train))
print(svm_s.score(X_test, y_test))
plot_cm(svm_s, X_test, y_test)

In [None]:
svm_p = SVC(random_state=42,
           kernel='precomputed',
          #class_weight="balanced",
          verbose=1)
svm_p = svm_p.fit(X_train, y_train)
print(svm_p.score(X_train, y_train))
print(svm_p.score(X_test, y_test))
plot_cm(svm_p, X_test, y_test)