In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA 

In [2]:
df = pd.DataFrame(np.load('Data/1000/sample_1000.npy', allow_pickle = True))
df = df.rename(columns = {784:'label'})
img = df[df.columns[0:784]].values.astype(float)
label = df['label'].values
img = img / 255


In [None]:
#partial split
x_train1, x_test, y_train1, y_test = train_test_split(img, label, test_size = 0.1, train_size = 0.5, random_state = 123, stratify = label)
x_train, x_valid, y_train, y_valid = train_test_split(x_train1, y_train1, test_size = 0.2, random_state = 123, stratify = y_train1)

In [3]:
#full split
x_train1, x_test, y_train1, y_test = train_test_split(img, label, test_size = 0.1, random_state = 123, stratify = label)
x_train, x_valid, y_train, y_valid = train_test_split(x_train1, y_train1, test_size = 0.2, random_state = 123, stratify = y_train1)

In [4]:
pca = PCA(n_components=100)
img_new = pca.fit_transform(img)
X_train = pca.fit_transform(x_train) 
pca.fit(img)
X_valid = pca.transform(x_valid) 
X_test = pca.transform(x_test)

In [None]:
mlg = LogisticRegression(penalty = 'l1', 
                         random_state = 123, 
                         solver = 'saga', 
                         multi_class = 'multinomial',
                         njobs = -1,
                         verbose = 1)
mlg.fit(X_train, y_train)

In [None]:
score_tr = mlg.score(X_train, y_train)
score_va = mlg.score(X_valid, y_valid)

In [5]:
mlgcv = LogisticRegressionCV(Cs=5,
                           cv=5,
                           penalty='l1',
                           solver='saga',
                           n_jobs=-1,
                           verbose=1,
                           multi_class='multinomial',
                           random_state=123,
                           tol = 0.05 )

mlgcv.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 12 concurrent workers.


convergence after 7 epochs took 290 seconds
convergence after 7 epochs took 291 seconds
convergence after 7 epochs took 291 seconds
convergence after 7 epochs took 291 seconds
convergence after 7 epochs took 291 seconds
convergence after 5 epochs took 379 seconds
convergence after 5 epochs took 379 seconds
convergence after 5 epochs took 380 seconds
convergence after 5 epochs took 380 seconds
convergence after 5 epochs took 380 seconds
convergence after 3 epochs took 313 seconds
convergence after 3 epochs took 312 seconds
convergence after 3 epochs took 312 seconds
convergence after 3 epochs took 313 seconds
convergence after 3 epochs took 313 seconds
convergence after 2 epochs took 206 seconds
convergence after 2 epochs took 206 seconds
convergence after 2 epochs took 205 seconds
convergence after 2 epochs took 207 seconds
convergence after 2 epochs took 207 seconds
convergence after 2 epochs took 209 seconds
convergence after 2 epochs took 209 seconds
convergence after 2 epochs took 

[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed: 23.4min finished


LogisticRegressionCV(Cs=5, class_weight=None, cv=5, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='multinomial', n_jobs=-1,
                     penalty='l1', random_state=123, refit=True, scoring=None,
                     solver='saga', tol=0.05, verbose=1)

In [6]:
score_tr = mlgcv.score(X_train, y_train)
score_va = mlgcv.score(X_valid, y_valid)
score_te = mlgcv.score(X_test, y_test)

In [7]:
print('train accuracy:', score_tr)
print('validation accuracy:', score_va)
print('test accuracy:', score_te)

train accuracy: 0.2635346215780998
validation accuracy: 0.18727858293075683
test accuracy: 0.18886956521739132
