## AML 01
***

### Data Preparation

In [1]:
from sklearn.datasets import load_digits
from data_preparation import data_preparation

from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn.linear_model import LogisticRegression

import numpy as np

%load_ext autoreload
%autoreload 2

In [2]:
digits = load_digits()
print(digits.keys())

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR'])


In [3]:
data = digits['data']
images = digits['images']
target = ['target']
target_names = ['target_names']

In [4]:
X, y = data_preparation(digits, filter=[3, 8])

In [5]:
lambdas = [10 ** i for i in range(-5, 5)]

In [6]:
accuracies = []
print("Lambda\t\t Accuracy")
print("=========================")
for lmbda in lambdas:
    LR = LogisticRegression(C=lmbda, solver='liblinear')
    accuracy = cross_val_score(LR, X, y, cv=5)
    accuracies.append(np.mean(accuracy))
    print("{} \t\t {:.5f}".format(lmbda, accuracies[-1]))

Lambda		 Accuracy
1e-05 		 0.94937
0.0001 		 0.95227
0.001 		 0.94953
0.01 		 0.95802
0.1 		 0.96365
1 		 0.96643
10 		 0.96362
100 		 0.96084
1000 		 0.95802
10000 		 0.95802


In [7]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)

### Optimization Methods Comparison

In [None]:
from utils import zero_one_loss, predict
from optimizers import (gradient_descent, stochastic_gradient_descent, stochastic_gradient_minibatch, stochastic_gradient_momentum)

for method in [gradient_descent, stochastic_gradient_descent, stochastic_gradient_minibatch, stochastic_gradient_momentum]:
    best = 1e5
    best_alpha0, best_momentum, best_gamma = 0, 0, 0
    for alpha0 in [0.001, 0.01, 0.1]:
        for momentum in [0.1, 0.2, 0.5]:
            for gamma in [0.0001, 0.001, 0.01]:
                w = np.zeros((X.shape[1], 1))
                current = 0
                kf = KFold(n_splits=10)
                for train_idx, val_idx in kf.split(X_train):
                    cv_X_train, cv_X_val = X_train[train_idx], X_train[val_idx]
                    cv_y_train, cv_y_val = y_train[train_idx], y_train[val_idx]
                    w = method(w, cv_X_train, cv_y_train, alpha0=alpha0, momentum=momentum, gamma=gamma)
                    current += zero_one_loss(predict(w, cv_X_val), cv_y_val)

                current /= 10.
                if current < best:
                    best = current
                    best_alpha0 = alpha0
                    best_momentum = momentum
                    best_gamma = gamma

    print('=' * 20)
    print('Method: \t', method)
    print('Parameters:')
    print('\tAlpha: \t', best_alpha0)
    print('\tMomentum: \t', best_momentum)
    print('\tGamma: \t', best_gamma)