In [2]:
import re
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score, GridSearchCV, ShuffleSplit

from sklearn.preprocessing import normalize
from sklearn.feature_selection import RFE
%matplotlib inline

# Import Data

In [3]:
folder = "~/workspace/datasets/ml-mipt-spring2018-hw5-v1/"
data = pd.read_csv(folder + 'train.csv')
data.drop('id', axis = 1, inplace = True)
X_train = np.asarray(data[data.columns[range(1, data.shape[1])]], dtype = np.double)
Y_train = np.asarray(data[['label']], dtype = np.double).ravel()
data.head()

Unnamed: 0,label,feat1,feat2,feat3,feat4,feat5,feat6,feat7,feat8,feat9,...,feat207,feat208,feat209,feat210,feat211,feat212,feat213,feat214,feat215,feat216
0,1,7,0,3,0,2,3,0,6,0,...,3,4,2,2,0,13,0,11,1,3
1,1,0,11,0,0,10,1,0,0,4,...,0,2,0,0,2,8,1,13,0,4
2,0,9,0,3,0,1,3,0,4,0,...,48,11,2,0,0,4,0,2,0,0
3,0,0,9,3,2,25,0,4,0,0,...,1,14,1,0,0,0,3,0,17,1
4,0,0,0,0,0,2,5,0,0,0,...,3,12,0,3,0,4,0,24,4,0


In [4]:
data.groupby(['label']).count()

Unnamed: 0_level_0,feat1,feat2,feat3,feat4,feat5,feat6,feat7,feat8,feat9,feat10,...,feat207,feat208,feat209,feat210,feat211,feat212,feat213,feat214,feat215,feat216
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,327,327,327,327,327,327,327,327,327,327,...,327,327,327,327,327,327,327,327,327,327
1,573,573,573,573,573,573,573,573,573,573,...,573,573,573,573,573,573,573,573,573,573


# Feature transformation

In [5]:
def transform_id(X, X_train):
    return X

In [6]:
def transform_normed(X, X_train):
    return (X - X_train.mean(axis=0)) / X_train.std(axis=0)

In [7]:
def transform_scaled(X, X_train):
    return X / X_train.max(axis=0)

In [8]:
from sklearn.decomposition import PCA

In [9]:
pca_100 = PCA(n_components=100, random_state=17)
pca_150 = PCA(n_components=150, random_state=17)
pca_200 = PCA(n_components=200, random_state=17)
            
pca_100.fit(X_train)
pca_150.fit(X_train)
pca_200.fit(X_train)

def transform_pca_100(X, X_train):
    return pca_100.transform(X)

def transform_pca_150(X, X_train):
    return pca_150.transform(X)

def transform_pca_200(X, X_train):
    return pca_200.transform(X)

# Fit Methods

In [10]:
transformations = [
    transform_pca_100,
    transform_pca_150,
    transform_pca_200,
]

In [11]:
estimators = [
    SGDClassifier(max_iter=50000, tol=0.1, penalty='l1', alpha=0.1),
    SGDClassifier(max_iter=50000, tol=0.1, penalty='l2', alpha=0.1),
    # SVC(kernel='rbf', C = 0.1, gamma=2),
    SVC(kernel='linear', C = 0.1, gamma=2),
    SVC(kernel='sigmoid', C = 0.1, gamma=2),
    SVC(kernel='poly', C = 0.1, gamma=2),
    
    # SVC(kernel='rbf', C = 1, gamma=2),
    SVC(kernel='linear', C = 1, gamma=2),
    SVC(kernel='sigmoid', C = 1, gamma=2),
    SVC(kernel='poly', C = 1, gamma=2),
    
    # SVC(kernel='rbf', C = 0.1, gamma="auto"),
    SVC(kernel='linear', C = 0.1, gamma="auto"),
    SVC(kernel='sigmoid', C = 0.1, gamma="auto"),
    SVC(kernel='poly', C = 0.1, gamma="auto"),
    
    #SVC(kernel='rbf', C = 1, gamma="auto"),
    SVC(kernel='linear', C = 1, gamma="auto"),
    SVC(kernel='sigmoid', C = 1, gamma="auto"),
    SVC(kernel='poly', C = 11, gamma="auto"),
    
    #SVC(kernel='precomputed', C = 0.1, gamma=2),
    #LinearSVC(penalty = 'l1', loss='hinge'), 
    LinearSVC(penalty = 'l1', loss='squared_hinge', dual=False),
    LinearSVC(penalty = 'l2', loss='hinge'),
    LinearSVC(penalty = 'l2', loss='squared_hinge', dual=False),# 'hinge' or 'squared_hinge', 
    LogisticRegression(penalty='l1', C=0.2),
    LogisticRegression(penalty='l1', C=1),
    LogisticRegression(penalty='l2', C=0.2),
    LogisticRegression(penalty='l2', C=1),
]

estimators = [
    LogisticRegression(class_weight = "balanced", penalty='l1', C=0.2),
    LogisticRegression(class_weight = "balanced", penalty='l1', C=1),
    LogisticRegression(class_weight = "balanced", penalty='l2', C=0.2),
    LogisticRegression(class_weight = "balanced", penalty='l2', C=1),
]

In [12]:
grid_result = []
cv_strategy = ShuffleSplit(n_splits=10, test_size=0.2, random_state=33)
for transform_i, transform in enumerate(transformations):
    X_train_transformed = transform(X_train, X_train)
    transform_name = str(transform).split(" ")[1]
    for estimator_i, estimator in enumerate(estimators):
        %time scoring = cross_val_score(estimator, X_train_transformed, Y_train, scoring='roc_auc', cv=cv_strategy)
        estimator_name_short = str(estimator)
        score_mean = scoring.mean()
        score_std = scoring.std()
        grid_result.append((transform_i, estimator_i, score_mean, score_std))
        print ('{}\n #data = {}, mean = {}, std = {}\n'.format(
            estimator_name_short, transform_name, round(score_mean, 4), round(score_std, 4)))

CPU times: user 637 ms, sys: 956 ms, total: 1.59 s
Wall time: 204 ms
LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
 #data = transform_pca_100, mean = 0.9205, std = 0.0194

CPU times: user 1.15 s, sys: 1.98 s, total: 3.13 s
Wall time: 396 ms
LogisticRegression(C=1, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
 #data = transform_pca_100, mean = 0.9179, std = 0.0207

CPU times: user 1.03 s, sys: 1.95 s, total: 2.98 s
Wall time: 377 ms
LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          m

In [13]:
grid_result = np.array(grid_result)
best_5 = grid_result[:, 2].argsort()[-5:][::-1]
for i in best_5:
    print(grid_result[i])
    transformation = transformations[int(grid_result[i, 0])]
    transform_name = str(transform).split(" ")[1]
    estimator_name = str(estimators[int(grid_result[i, 1])])
    score_mean =  grid_result[i, 2]
    score_std =  grid_result[i, 3]
    print ('{}\n #data = {}, mean = {}, std = {}\n'.format(
        estimator_name, transform_name, round(score_mean, 4), round(score_std, 4)))

[1.         0.         0.92215553 0.02003082]
LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
 #data = transform_pca_200, mean = 0.9222, std = 0.02

[2.         0.         0.92113349 0.01975395]
LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
 #data = transform_pca_200, mean = 0.9211, std = 0.0198

[2.         1.         0.92069704 0.01634099]
LogisticRegression(C=1, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          

# Get final classifier

In [14]:
best_i = int(grid_result[:, 2].argmax())
print(grid_result[best_i])
transformation = transformations[int(grid_result[best_i, 0])]
estimator = estimators[int(grid_result[best_i, 1])]
print("Best:", transformation, estimator)
X_train_transformed = transformation(X_train, X_train)
estimator.fit(X_train_transformed, Y_train)

[1.         0.         0.92215553 0.02003082]
Best: <function transform_pca_150 at 0x7f1412cbf598> LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)


LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

# Some parameters fitting

In [16]:
estimator = LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

params = {
    "C":np.linspace(0.01, 5, 100),
    "penalty":["l1", "l2"],
}
gscv = GridSearchCV(estimator, params, scoring='roc_auc', n_jobs=-1, verbose=100)
gscv.fit(X_train_transformed, Y_train)

Fitting 3 folds for each of 200 candidates, totalling 600 fits
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l2 ..............................................
[CV] C=0.01, penalty=l2 ..............................................
[CV] C=0.01, penalty=l2 ..............................................
[CV] C=0.06040404040404041, penalty=l1 ...............................
[CV] C=0.06040404040404041, penalty=l1 ...............................
Memmaping (shape=(900, 150), dtype=float64) to new file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/jobl

[CV] C=0.11080808080808081, penalty=l2 ...............................
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[CV]  C=0.11080808080808081, penalty=l1, score=0.8821269033094769, total=   0.1s
[CV]  C=0.11080808080808081, penalty=l1, score=0.9328017676161199, total=   0.1s
[CV]  C=0.11080808080808081, penalty=l2, score=0.8913492482828186, total=   0.1s
[CV] C=0.16121212121212125, penalty=l1 ...............................
[CV]  C=0.06040404040404041, penalty=l2, score=0.9182957875018012, total=   0

[CV]  C=0.4132323232323233, penalty=l1, score=0.9240117200634037, total=   0.2s
[CV] C=0.21161616161616165, penalty=l2 ...............................
[CV] C=0.16121212121212125, penalty=l2 ...............................
[CV]  C=0.21161616161616165, penalty=l2, score=0.8690619145972428, total=   0.2s
[CV] C=0.4132323232323233, penalty=l1 ................................
[CV] C=0.3628282828282829, penalty=l1 ................................
[CV] C=0.3124242424242425, penalty=l1 ................................
[CV] C=0.26202020202020204, penalty=l1 ...............................
[CV]  C=0.3628282828282829, penalty=l2, score=0.9153177386041597, total=   0.3s
[CV]  C=0.26202020202020204, penalty=l2, score=0.8876987367308709, total=   0.1s
[CV] C=0.26202020202020204, penalty=l2 ...............................
[CV] C=0.3628282828282829, penalty=l2 ................................
[CV]  C=0.16121212121212125, penalty=l2, score=0.8893318603199001, total=   0.2s
[CV]  C=0.3124242424242425, p

[CV]  C=0.4132323232323233, penalty=l2, score=0.9145492098563812, total=   0.2s
[CV]  C=0.614848484848485, penalty=l2, score=0.8869302079830924, total=   0.3s
[CV]  C=0.5140404040404041, penalty=l1, score=0.8838560929919785, total=   0.2s
[CV] C=0.5140404040404041, penalty=l2 ................................
[CV]  C=0.4636363636363637, penalty=l2, score=0.9146452759498536, total=   0.3s
[CV]  C=0.5644444444444445, penalty=l2, score=0.9153657716508958, total=   0.2s
[CV]  C=0.4636363636363637, penalty=l1, score=0.9240117200634036, total=   0.3s
[CV] C=0.5644444444444445, penalty=l2 ................................
[CV] C=0.4636363636363637, penalty=l2 ................................
[CV] C=0.4636363636363637, penalty=l1 ................................
[CV] C=0.6652525252525253, penalty=l1 ................................
[CV] C=0.614848484848485, penalty=l2 .................................
[CV]  C=0.614848484848485, penalty=l1, score=0.902060617704981, total=   0.3s
[CV] C=0.61484848

[CV] C=0.6652525252525253, penalty=l2 ................................
[CV] C=0.866868686868687, penalty=l2 .................................
[CV]  C=0.8164646464646466, penalty=l2, score=0.8864498775157309, total=   0.4s
[CV] C=0.8164646464646466, penalty=l2 ................................
[CV]  C=0.7156565656565658, penalty=l1, score=0.8824151015898938, total=   0.3s
[CV] C=0.7156565656565658, penalty=l2 ................................
[CV]  C=0.7660606060606061, penalty=l2, score=0.9156059368845767, total=   0.4s
[CV]  C=0.8164646464646466, penalty=l1, score=0.900187328882271, total=   0.2s
[CV] C=0.7660606060606061, penalty=l2 ................................
[CV]  C=0.7660606060606061, penalty=l1, score=0.9253086123252797, total=   0.2s
[CV]  C=0.6652525252525253, penalty=l1, score=0.9012440559104664, total=   0.4s
[CV] C=0.8164646464646466, penalty=l1 ................................
[CV] C=0.9172727272727274, penalty=l1 ................................
[CV] C=0.766060606060606

[CV]  C=1.0684848484848486, penalty=l2, score=0.9141649454824919, total=   0.1s
[CV] C=1.0684848484848486, penalty=l2 ................................
[CV]  C=1.118888888888889, penalty=l1, score=0.9237235217829868, total=   0.2s
[CV]  C=1.0180808080808081, penalty=l1, score=0.8799654162063499, total=   0.2s
[CV]  C=0.9676767676767678, penalty=l2, score=0.8860656131418415, total=   0.3s
[CV]  C=0.9172727272727274, penalty=l2, score=0.9149815072770066, total=   0.3s
[CV] C=0.9676767676767678, penalty=l2 ................................
[CV]  C=1.118888888888889, penalty=l1, score=0.8799173831596139, total=   0.4s
[CV] C=1.0180808080808081, penalty=l2 ................................
[CV] C=1.118888888888889, penalty=l2 .................................
[CV] C=1.118888888888889, penalty=l2 .................................
[CV]  C=0.9676767676767678, penalty=l1, score=0.9249723809981266, total=   0.4s
[CV] C=0.9676767676767678, penalty=l1 ................................
[CV]  C=1.068484

[CV] C=1.21969696969697, penalty=l2 ..................................
[CV]  C=1.1692929292929295, penalty=l1, score=0.899226667947548, total=   0.5s
[CV] C=1.3205050505050506, penalty=l1 ................................
[CV]  C=1.1692929292929295, penalty=l2, score=0.913876747202075, total=   0.4s
[CV] C=1.3205050505050506, penalty=l1 ................................
[CV]  C=1.21969696969697, penalty=l2, score=0.8685335510831451, total=   0.2s
[CV] C=1.3205050505050506, penalty=l2 ................................
[Parallel(n_jobs=-1)]: Done 142 tasks      | elapsed:    6.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-13972

[CV]  C=1.370909090909091, penalty=l1, score=0.8988904366203948, total=   0.4s
[CV] C=1.4213131313131315, penalty=l2 ................................
[CV]  C=1.370909090909091, penalty=l2, score=0.8862577453287862, total=   0.3s
[CV] C=1.4213131313131315, penalty=l2 ................................
[CV] C=1.471717171717172, penalty=l1 .................................
[CV]  C=1.4213131313131315, penalty=l2, score=0.9125798549401988, total=   0.2s
[CV]  C=1.370909090909091, penalty=l2, score=0.8684374849896729, total=   0.4s
[CV] C=1.471717171717172, penalty=l1 .................................
[CV] C=1.471717171717172, penalty=l1 .................................
[Parallel(n_jobs=-1)]: Done 166 tasks      | elapsed:    7.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling a

[CV]  C=1.5221212121212122, penalty=l1, score=0.8989865027138673, total=   0.6s
[CV]  C=1.5221212121212122, penalty=l2, score=0.8681973197559921, total=   0.3s
[CV] C=1.5725252525252527, penalty=l2 ................................
[CV] C=1.622929292929293, penalty=l1 .................................
[Parallel(n_jobs=-1)]: Done 181 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 182 tasks      | elapsed:    8.2s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (sh

[CV]  C=1.622929292929293, penalty=l1, score=0.8992266679475479, total=   0.7s
[CV]  C=1.6733333333333336, penalty=l2, score=0.912003458379365, total=   0.3s
[CV]  C=1.6733333333333336, penalty=l1, score=0.8802536144867669, total=   0.4s
[CV] C=1.723737373737374, penalty=l1 .................................
[CV]  C=1.723737373737374, penalty=l1, score=0.9214659685863875, total=   0.3s
[CV] C=1.723737373737374, penalty=l2 .................................
[CV]  C=1.6733333333333336, penalty=l2, score=0.8678130553821028, total=   0.3s
[CV] C=1.723737373737374, penalty=l2 .................................
[CV] C=1.723737373737374, penalty=l2 .................................
[CV]  C=1.6733333333333336, penalty=l1, score=0.9217061338200683, total=   0.5s
[CV]  C=1.6733333333333336, penalty=l1, score=0.8991306018540756, total=   0.5s
[CV] C=1.7741414141414142, penalty=l1 ................................
[CV] C=1.7741414141414142, penalty=l1 ................................
[Parallel(n_jobs=

[CV]  C=1.8245454545454547, penalty=l1, score=0.8994668331812287, total=   0.5s
[CV] C=1.8749494949494951, penalty=l2 ................................
[CV]  C=1.8245454545454547, penalty=l2, score=0.886161679235314, total=   0.4s
[CV]  C=1.8245454545454547, penalty=l1, score=0.8802055814400307, total=   0.4s
[CV]  C=1.8245454545454547, penalty=l2, score=0.867861088428839, total=   0.4s
[CV] C=1.8749494949494951, penalty=l2 ................................
[CV] C=1.8749494949494951, penalty=l2 ................................
[CV]  C=1.8245454545454547, penalty=l2, score=0.9118593592391565, total=   0.3s
[CV] C=1.9253535353535356, penalty=l1 ................................
[CV] C=1.9253535353535356, penalty=l1 ................................
[Parallel(n_jobs=-1)]: Done 218 tasks      | elapsed:   10.4s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (sha

[CV]  C=1.975757575757576, penalty=l2, score=0.9117632931456843, total=   0.2s
[CV] C=2.026161616161616, penalty=l1 .................................
[CV]  C=1.9253535353535356, penalty=l2, score=0.9118113261924203, total=   0.5s
[CV]  C=1.975757575757576, penalty=l1, score=0.9202171093712475, total=   0.4s
[CV] C=2.026161616161616, penalty=l1 .................................
[CV] C=2.026161616161616, penalty=l2 .................................
[CV]  C=1.975757575757576, penalty=l2, score=0.8861136461885777, total=   0.3s
[CV] C=2.026161616161616, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed:   11.2s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 235 tasks

[CV]  C=2.0765656565656565, penalty=l1, score=0.8997069984149094, total=   0.7s
[CV]  C=2.126969696969697, penalty=l2, score=0.8863057783755224, total=   0.2s
[CV] C=2.1773737373737374, penalty=l1 ................................
[CV] C=2.1773737373737374, penalty=l1 ................................
[CV]  C=2.1773737373737374, penalty=l1, score=0.9203131754647198, total=   0.2s
[CV]  C=2.126969696969697, penalty=l1, score=0.9201690763245113, total=   0.6s
[CV] C=2.1773737373737374, penalty=l2 ................................
[CV] C=2.1773737373737374, penalty=l2 ................................
[CV]  C=2.126969696969697, penalty=l2, score=0.911379028771795, total=   0.4s
[CV] C=2.1773737373737374, penalty=l2 ................................
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:   12.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape

[CV] C=2.3285858585858588, penalty=l1 ................................
[CV]  C=2.227777777777778, penalty=l1, score=0.8788606561314184, total=   0.5s
[CV]  C=2.2781818181818183, penalty=l1, score=0.9198808780440942, total=   0.4s
[CV] C=2.3285858585858588, penalty=l1 ................................
[CV] C=2.3285858585858588, penalty=l1 ................................
[CV]  C=2.227777777777778, penalty=l1, score=0.8997069984149095, total=   0.7s
[CV] C=2.3285858585858588, penalty=l2 ................................
[Parallel(n_jobs=-1)]: Done 270 tasks      | elapsed:   13.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 271 tasks      | elapsed:   13.3s
Memmaping (shape=(900, 150), dtype=float64) to old fil

[CV]  C=2.4293939393939397, penalty=l1, score=0.8994188001344925, total=   0.5s
[CV] C=2.4797979797979797, penalty=l1 ................................
[CV]  C=2.4293939393939397, penalty=l1, score=0.8781881934771122, total=   0.5s
[CV] C=2.4797979797979797, penalty=l2 ................................
[CV]  C=2.4293939393939397, penalty=l1, score=0.9195446467169412, total=   0.6s
[CV] C=2.4797979797979797, penalty=l2 ................................
[CV]  C=2.4293939393939397, penalty=l2, score=0.9108026322109611, total=   0.4s
[CV] C=2.4797979797979797, penalty=l2 ................................
[CV]  C=2.4293939393939397, penalty=l2, score=0.8864018444689947, total=   0.3s
[Parallel(n_jobs=-1)]: Done 289 tasks      | elapsed:   14.5s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64)

[CV]  C=2.53020202020202, penalty=l2, score=0.8682933858494644, total=   0.4s
[CV] C=2.631010101010101, penalty=l1 .................................
[CV]  C=2.5806060606060606, penalty=l2, score=0.9103223017435996, total=   0.2s
[CV] C=2.631010101010101, penalty=l1 .................................
[CV]  C=2.5806060606060606, penalty=l1, score=0.8992747009942841, total=   0.4s
[CV] C=2.631010101010101, penalty=l1 .................................
[CV] C=2.631010101010101, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 305 tasks      | elapsed:   15.4s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   15.4s
Memmaping (shape=(900, 150), dtype=float64) to old fil

[CV] C=2.7822222222222224, penalty=l1 ................................
[CV]  C=2.731818181818182, penalty=l1, score=0.9186800518756904, total=   0.5s
[CV] C=2.7822222222222224, penalty=l1 ................................
[CV]  C=2.6814141414141415, penalty=l2, score=0.8684374849896729, total=   0.5s
[CV]  C=2.731818181818182, penalty=l1, score=0.8776117969162784, total=   0.4s
[CV]  C=2.731818181818182, penalty=l2, score=0.9102262356501273, total=   0.4s
[CV] C=2.7822222222222224, penalty=l1 ................................
[CV]  C=2.731818181818182, penalty=l2, score=0.8682933858494644, total=   0.4s
[CV] C=2.7822222222222224, penalty=l2 ................................
[CV] C=2.7822222222222224, penalty=l2 ................................
[CV]  C=2.731818181818182, penalty=l1, score=0.8993707670877563, total=   0.5s
[CV]  C=2.7822222222222224, penalty=l1, score=0.9184879196887459, total=   0.5s
[CV]  C=2.731818181818182, penalty=l2, score=0.8866420097026755, total=   0.5s
[CV] C=2.78

[CV]  C=2.832626262626263, penalty=l2, score=0.9100341034631827, total=   0.6s
[CV] C=2.8830303030303033, penalty=l2 ................................
[CV]  C=2.832626262626263, penalty=l2, score=0.8866420097026755, total=   0.5s
[CV]  C=2.832626262626263, penalty=l2, score=0.8684374849896729, total=   0.4s
[CV]  C=2.8830303030303033, penalty=l2, score=0.9098900043229743, total=   0.4s
[CV]  C=2.8830303030303033, penalty=l1, score=0.8773716316825976, total=   0.4s
[CV]  C=2.8830303030303033, penalty=l1, score=0.9185839857822182, total=   0.5s
[CV] C=2.9334343434343437, penalty=l1 ................................
[CV] C=2.9334343434343437, penalty=l1 ................................
[CV]  C=2.8830303030303033, penalty=l2, score=0.8867861088428839, total=   0.4s
[CV] C=2.9334343434343437, penalty=l2 ................................
[CV] C=2.9334343434343437, penalty=l1 ................................
[CV] C=2.9334343434343437, penalty=l2 ................................
[CV] C=2.93343434

[CV] C=3.034242424242424, penalty=l2 .................................
[CV]  C=2.9838383838383837, penalty=l2, score=0.868485518036409, total=   0.5s
[CV] C=3.0846464646464646, penalty=l1 ................................
[CV]  C=2.9838383838383837, penalty=l1, score=0.8991786349008118, total=   0.7s
[CV]  C=3.034242424242424, penalty=l1, score=0.9183438205485374, total=   0.5s
[Parallel(n_jobs=-1)]: Done 355 tasks      | elapsed:   18.8s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 356 tasks      | elapsed:   18.8s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), 

[CV] C=3.1854545454545455, penalty=l1 ................................
[CV]  C=3.135050505050505, penalty=l2, score=0.8866900427494115, total=   0.4s
[CV] C=3.1854545454545455, penalty=l1 ................................
[CV]  C=3.135050505050505, penalty=l2, score=0.8683894519429367, total=   0.3s
[CV] C=3.1854545454545455, penalty=l2 ................................
[Parallel(n_jobs=-1)]: Done 372 tasks      | elapsed:   19.8s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 373 tasks      | elapsed:   19.9s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=flo

[CV]  C=3.2862626262626264, penalty=l1, score=0.8989865027138672, total=   0.6s
[CV]  C=3.2862626262626264, penalty=l2, score=0.8683894519429367, total=   0.3s
[CV] C=3.336666666666667, penalty=l1 .................................
[CV]  C=3.2862626262626264, penalty=l2, score=0.9097459051827657, total=   0.4s
[CV]  C=3.2862626262626264, penalty=l1, score=0.8771314664489168, total=   0.5s
[CV] C=3.336666666666667, penalty=l2 .................................
[CV]  C=3.2862626262626264, penalty=l2, score=0.8867861088428839, total=   0.3s
[CV] C=3.336666666666667, penalty=l2 .................................
[CV] C=3.336666666666667, penalty=l2 .................................
[CV]  C=3.2862626262626264, penalty=l1, score=0.918247754455065, total=   0.6s
[CV] C=3.3870707070707073, penalty=l1 ................................
[CV] C=3.3870707070707073, penalty=l1 ................................
[Parallel(n_jobs=-1)]: Done 391 tasks      | elapsed:   21.1s
Memmaping (shape=(900, 150), dtyp

[CV]  C=3.437474747474748, penalty=l1, score=0.918199721408329, total=   0.7s
[CV] C=3.4878787878787882, penalty=l1 ................................
[CV]  C=3.437474747474748, penalty=l1, score=0.8987943705269226, total=   0.6s
[CV] C=3.4878787878787882, penalty=l2 ................................
[CV]  C=3.437474747474748, penalty=l2, score=0.9097939382295019, total=   0.4s
[CV]  C=3.437474747474748, penalty=l2, score=0.8683894519429367, total=   0.4s
[CV] C=3.4878787878787882, penalty=l2 ................................
[CV] C=3.4878787878787882, penalty=l2 ................................
[CV]  C=3.437474747474748, penalty=l2, score=0.8869302079830924, total=   0.4s
[CV] C=3.5382828282828283, penalty=l1 ................................
[Parallel(n_jobs=-1)]: Done 409 tasks      | elapsed:   22.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(

[CV]  C=3.5886868686868687, penalty=l2, score=0.9099380373697105, total=   0.2s
[CV] C=3.639090909090909, penalty=l1 .................................
[CV]  C=3.5886868686868687, penalty=l2, score=0.8870743071233008, total=   0.2s
[CV]  C=3.5382828282828283, penalty=l2, score=0.8685335510831452, total=   0.4s
[CV] C=3.639090909090909, penalty=l2 .................................
[CV] C=3.639090909090909, penalty=l1 .................................
[CV]  C=3.5886868686868687, penalty=l1, score=0.8767952351217638, total=   0.3s
[CV] C=3.639090909090909, penalty=l2 .................................
[CV]  C=3.5886868686868687, penalty=l1, score=0.9181036553148567, total=   0.5s
[CV] C=3.639090909090909, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 426 tasks      | elapsed:   23.4s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (s

[CV] C=3.7903030303030305, penalty=l1 ................................
[CV] C=3.73989898989899, penalty=l2 ..................................
[CV]  C=3.73989898989899, penalty=l1, score=0.8987943705269226, total=   0.5s
[CV] C=3.7903030303030305, penalty=l2 ................................
[CV] C=3.7903030303030305, penalty=l1 ................................
[Parallel(n_jobs=-1)]: Done 441 tasks      | elapsed:   24.5s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   24.6s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pi

[CV] C=3.941515151515152, penalty=l1 .................................
[CV]  C=3.8911111111111114, penalty=l2, score=0.9096018060425572, total=   0.2s
[CV] C=3.941515151515152, penalty=l1 .................................
[CV]  C=3.840707070707071, penalty=l1, score=0.8988904366203948, total=   0.7s
[CV] C=3.941515151515152, penalty=l1 .................................
[CV]  C=3.8911111111111114, penalty=l2, score=0.8870262740765646, total=   0.3s
[Parallel(n_jobs=-1)]: Done 461 tasks      | elapsed:   25.8s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 462 tasks      | elapsed:   25.9s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-13972419112

[CV]  C=3.9919191919191923, penalty=l2, score=0.8680532206157836, total=   0.4s
[CV] C=4.042323232323232, penalty=l2 .................................
[CV]  C=4.042323232323232, penalty=l2, score=0.8870262740765646, total=   0.2s
[CV] C=4.092727272727273, penalty=l1 .................................
[CV]  C=3.9919191919191923, penalty=l1, score=0.8768913012152361, total=   0.6s
[CV] C=4.092727272727273, penalty=l1 .................................
[CV]  C=3.9919191919191923, penalty=l1, score=0.9182957875018012, total=   0.8s
[Parallel(n_jobs=-1)]: Done 478 tasks      | elapsed:   26.9s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 479 tasks      | elapsed:   27.0s
Memmaping (shape=(900, 150), dtype=float64) 

[CV] C=4.193535353535354, penalty=l2 .................................
[CV]  C=4.143131313131313, penalty=l2, score=0.9094096738556126, total=   0.4s
[CV] C=4.193535353535354, penalty=l2 .................................
[CV]  C=4.143131313131313, penalty=l2, score=0.8871703732167732, total=   0.4s
[CV] C=4.193535353535354, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 494 tasks      | elapsed:   28.1s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 495 tasks      | elapsed:   28.2s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=flo

[CV]  C=4.294343434343435, penalty=l2, score=0.9096018060425572, total=   0.2s
[CV] C=4.344747474747475, penalty=l1 .................................
[CV]  C=4.294343434343435, penalty=l2, score=0.8872664393102455, total=   0.4s
[CV] C=4.344747474747475, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 511 tasks      | elapsed:   29.2s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 512 tasks      | elapsed:   29.3s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shap

[CV]  C=4.3951515151515155, penalty=l2, score=0.8679571545223114, total=   0.5s
[CV] C=4.495959595959596, penalty=l1 .................................
[CV]  C=4.445555555555556, penalty=l2, score=0.9096498390892934, total=   0.3s
[CV]  C=4.445555555555556, penalty=l1, score=0.9174792257072866, total=   0.6s
[CV] C=4.495959595959596, penalty=l2 .................................
[CV] C=4.495959595959596, penalty=l1 .................................
[CV]  C=4.445555555555556, penalty=l2, score=0.8678610884288389, total=   0.3s
[CV] C=4.495959595959596, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 528 tasks      | elapsed:   30.4s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 529 tasks

[CV] C=4.596767676767677, penalty=l2 .................................
[CV]  C=4.546363636363637, penalty=l1, score=0.8769873673087084, total=   0.6s
[CV] C=4.647171717171718, penalty=l1 .................................
[CV]  C=4.596767676767677, penalty=l2, score=0.9094096738556126, total=   0.2s
[CV]  C=4.546363636363637, penalty=l1, score=0.9171910274268696, total=   0.8s
[CV] C=4.647171717171718, penalty=l1 .................................
[CV] C=4.647171717171718, penalty=l1 .................................
[CV]  C=4.596767676767677, penalty=l2, score=0.8873144723569816, total=   0.3s
[CV] C=4.647171717171718, penalty=l2 .................................
[Parallel(n_jobs=-1)]: Done 544 tasks      | elapsed:   31.5s
[Parallel(n_jobs=-1)]: Done 545 tasks      | elapsed:   31.5s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=flo

[CV] C=4.747979797979799, penalty=l2 .................................
[CV] C=4.798383838383839, penalty=l1 .................................
[Parallel(n_jobs=-1)]: Done 559 tasks      | elapsed:   32.6s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
[Parallel(n_jobs=-1)]: Done 560 tasks      | elapsed:   32.6s
[Parallel(n_jobs=-1)]: Done 561 tasks      | elapsed:   32.6s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (shape=(900,), dtype=float64).
Pickling array (shape=(600,), dtype=int64).
Pickling array (shape=(300,), dtype=int64).
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_

[CV]  C=4.8487878787878795, penalty=l2, score=0.9091695086219319, total=   0.4s
[CV] C=4.899191919191919, penalty=l1 .................................
[CV]  C=4.8487878787878795, penalty=l2, score=0.8679571545223113, total=   0.4s
[CV]  C=4.8487878787878795, penalty=l2, score=0.8875066045439263, total=   0.4s
[CV]  C=4.8487878787878795, penalty=l1, score=0.8768432681684999, total=   0.5s
[CV] C=4.899191919191919, penalty=l2 .................................
[CV] C=4.899191919191919, penalty=l2 .................................
[CV] C=4.899191919191919, penalty=l2 .................................
[CV]  C=4.8487878787878795, penalty=l1, score=0.8984101061530333, total=   0.7s
[CV] C=4.9495959595959595, penalty=l1 ................................
[Parallel(n_jobs=-1)]: Done 577 tasks      | elapsed:   33.8s
Memmaping (shape=(900, 150), dtype=float64) to old file /dev/shm/joblib_memmaping_pool_4098_139725504731120/4098-139724191125064-0e161166c43942a61d21f152c23f801b.pkl
Pickling array (s

GridSearchCV(cv=None, error_score='raise',
       estimator=LogisticRegression(C=0.2, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'C': array([0.01  , 0.0604, ..., 4.9496, 5.    ]), 'penalty': ['l1', 'l2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='roc_auc', verbose=100)

In [17]:
gscv.best_score_

0.9160062122740447

In [18]:
gscv.best_params_

{'C': 0.06040404040404041, 'penalty': 'l1'}

In [19]:
gscv.best_estimator_

LogisticRegression(C=0.06040404040404041, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

# Finally get the best estimator

In [20]:
estimator = LogisticRegression(C=0.06040404040404041, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
estimator

LogisticRegression(C=0.06040404040404041, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [21]:
Y_train

array([1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.,
       0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 0.,
       1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0.,
       0., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
       1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0.,
       0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0.,
       1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1.,
       0., 1., 1., 0., 0.

In [22]:
estimator.fit(X_train_transformed, Y_train)
estimator.predict(X_train_transformed)

array([1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0.,
       0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
       1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0.,
       1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0.,
       0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
       0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1.,
       1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1.,
       0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.,
       1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1.,
       0., 1., 1., 0., 0.

# Test data estimation

In [23]:
test = pd.read_csv(folder + 'test.csv')
X_test = np.asarray(test[test.columns[range(1, test.shape[1])]])
X_test = transformation(X_test, X_train)
X_test

array([[ 4.90865372e+00,  8.07916915e+00, -3.03180384e+00, ...,
         2.20750873e+00, -2.77269606e+00,  1.27190034e-02],
       [ 3.05102364e+01, -1.22820387e+01,  9.36072067e+00, ...,
        -1.00810519e+00, -9.25879407e-01,  5.31296650e-01],
       [-1.03085217e+00,  7.40297910e+01,  1.62172706e+01, ...,
        -1.60877955e-01, -1.72717864e+00, -6.57219763e-01],
       ...,
       [ 4.18816962e+01, -2.12295004e+01,  2.51458608e+01, ...,
         1.41318928e+00, -3.56552819e-01, -1.05527151e+00],
       [ 5.09818782e+01, -2.33857750e+01,  1.95681952e+01, ...,
         5.87915174e-01,  5.77421878e-01, -6.46377953e-01],
       [ 2.55484472e+01, -4.53069025e+01, -2.65429521e+01, ...,
         5.28181574e-01, -1.78592638e-01, -6.69066602e-01]])

In [24]:
test_predict=estimator.predict(X_test)
test['label'] = test_predict
test[['id', 'label']].to_csv('sol_1.csv', sep = ',', index = False)
test

Unnamed: 0,id,feat1,feat2,feat3,feat4,feat5,feat6,feat7,feat8,feat9,...,feat208,feat209,feat210,feat211,feat212,feat213,feat214,feat215,feat216,label
0,1,4,2,2,0,4,2,1,2,1,...,5,3,1,0,7,1,6,2,3,1.0
1,2,3,3,0,0,1,2,0,9,3,...,1,0,0,1,21,0,27,2,0,1.0
2,3,0,0,0,2,2,8,0,1,0,...,23,0,0,0,1,0,5,1,1,0.0
3,4,0,0,0,0,0,0,0,7,0,...,4,0,0,0,14,0,18,1,3,1.0
4,5,1,2,3,0,2,0,0,13,0,...,4,0,2,0,20,0,17,1,7,1.0
5,6,0,13,0,0,2,0,0,4,5,...,2,0,0,0,6,1,7,1,3,1.0
6,7,0,3,1,4,0,0,0,3,2,...,2,1,0,2,0,1,0,2,35,1.0
7,8,0,7,1,0,1,2,0,19,5,...,2,1,0,1,10,2,8,2,12,1.0
8,9,10,0,1,0,0,0,2,6,1,...,3,2,0,0,7,0,10,2,1,1.0
9,10,2,0,2,0,2,4,1,2,0,...,44,2,2,0,2,1,5,11,0,0.0


In [25]:
estimator, transformation

(LogisticRegression(C=0.06040404040404041, class_weight='balanced', dual=False,
           fit_intercept=True, intercept_scaling=1, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
           solver='liblinear', tol=0.0001, verbose=0, warm_start=False),
 <function __main__.transform_pca_150>)

In [26]:
np.unique(test_predict, return_counts=True)

(array([0., 1.]), array([3814, 5236]))