In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV, train_test_split

from sklearn.svm import LinearSVC

from sklearn.metrics import accuracy_score

In [3]:
data = pd.read_csv('task_d.csv')
data.head()

Unnamed: 0,x,y,z,x*x,2*y,2*z+3*x*x,w,target
0,-0.581066,0.841837,-1.012978,-0.604025,0.841837,-0.665927,-0.536277,0
1,-0.894309,-0.207835,-1.012978,-0.883052,-0.207835,-0.917054,-0.522364,0
2,-1.207552,0.212034,-1.082312,-1.150918,0.212034,-1.166507,0.205738,0
3,-1.364174,0.002099,-0.943643,-1.280666,0.002099,-1.26654,-0.66572,0
4,-0.737687,1.051772,-1.012978,-0.744934,1.051772,-0.792746,-0.735054,0


In [4]:
X = data.drop(labels = ['target'], axis = 1).values
X[:3]

array([[-0.5810659 ,  0.84183714, -1.01297765, -0.60402468,  0.84183714,
        -0.66592679, -0.53627703],
       [-0.89430898, -0.2078351 , -1.01297765, -0.88305213, -0.2078351 ,
        -0.91705408, -0.52236404],
       [-1.20755205,  0.21203379, -1.08231219, -1.15091848,  0.21203379,
        -1.16650718,  0.20573767]])

In [5]:
scaler = StandardScaler()

In [6]:
scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [7]:
X_std = scaler.transform(X)

In [8]:
Y = data.target.values

In [9]:
D_Train, d_test, Y_Train, y_test = train_test_split(X_std, Y, test_size = 0.2, random_state = 42, stratify = Y)

In [10]:
d_train, d_cv, y_train, y_cv = train_test_split(D_Train, Y_Train, test_size = 0.2, random_state = 52, stratify = Y_Train)

In [11]:
model = LinearSVC()

In [12]:
penalties = ['l1', 'l2']
penalties

['l1', 'l2']

In [13]:
losses = ['hinge', 'squared_hinge']
losses

['hinge', 'squared_hinge']

In [14]:
tolerances = [1e-3, 1e-4, 1e-5]
tolerances

[0.001, 0.0001, 1e-05]

In [15]:
cs = np.logspace(10, 1, 400)
print(type(cs))
print(cs.shape)
cs[:5]

<class 'numpy.ndarray'>
(400,)


array([1.00000000e+10, 9.49387718e+09, 9.01337039e+09, 8.55718314e+09,
       8.12408458e+09])

In [16]:
class_weights = [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}, {1:0.3, 0:0.7}]
class_weights

[{1: 0.5, 0: 0.5},
 {1: 0.4, 0: 0.6},
 {1: 0.6, 0: 0.4},
 {1: 0.7, 0: 0.3},
 {1: 0.3, 0: 0.7}]

In [17]:
params_grid = dict(
    penalty = penalties,
    loss = losses,
    tol = tolerances,
    C = cs,
    class_weight = class_weights)

In [18]:
clf = GridSearchCV(model, params_grid, n_jobs = -1, verbose = 1)

In [19]:
clf.fit(d_train, y_train)

Fitting 5 folds for each of 24000 candidates, totalling 120000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 2584 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 10584 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 21784 tasks      | elapsed:   16.6s
[Parallel(n_jobs=-1)]: Done 36184 tasks      | elapsed:   26.3s
[Parallel(n_jobs=-1)]: Done 53784 tasks      | elapsed:   37.7s
[Parallel(n_jobs=-1)]: Done 74584 tasks      | elapsed:   51.7s
[Parallel(n_jobs=-1)]: Done 98584 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 120000 out of 120000 | elapsed:  1.4min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=LinearSVC(C=1.0, class_weight=None, dual=True,
                                 fit_intercept=True, intercept_scaling=1,
                                 loss='squared_hinge', max_iter=1000,
                                 multi_class='ovr', penalty='l2',
                                 random_state=None, tol=0.0001, verbose=0),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': array([1.00000000e+10, 9.49387718e+09, 9.01337039e+09, 8.55718314e+09,
       8.12...
       1.43844989e+01, 1.36564666e+01, 1.29652816e+01, 1.23090791e+01,
       1.16860886e+01, 1.10946289e+01, 1.05331045e+01, 1.00000000e+01]),
                         'class_weight': [{0: 0.5, 1: 0.5}, {0: 0.6, 1: 0.4},
                                          {0: 0.4, 1: 0.6}, {0: 0.3, 1: 0.7},
                                          {0: 0.7, 1: 0.3}],
                         'loss': ['hinge', 'squared_hinge'],
                         

In [20]:
best_model = clf.best_estimator_
best_model.fit(d_train, y_train)

LinearSVC(C=10000000000.0, class_weight={0: 0.5, 1: 0.5}, dual=True,
          fit_intercept=True, intercept_scaling=1, loss='hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.001,
          verbose=0)

In [21]:
best_model.coef_

array([[ 0.22685258, -0.2055681 ,  0.83240615,  0.18028254, -0.2055681 ,
         0.26275416, -0.13076901]])

In [23]:
best_model.get_params

<bound method BaseEstimator.get_params of LinearSVC(C=10000000000.0, class_weight={0: 0.5, 1: 0.5}, dual=True,
          fit_intercept=True, intercept_scaling=1, loss='hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.001,
          verbose=0)>

In [22]:
y_test_hat = best_model.predict(d_test)
score = accuracy_score(y_test, y_test_hat)
score

1.0