In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

# # Interactive plots
# %matplotlib notebook

# Load all packages used
import matplotlib.pyplot as plt
import numpy as np
import mypackage
import sklearn

timer = mypackage.utils.Timer()

TensorFlow version is 2.1.0


Using TensorFlow backend.


In [2]:
# Load the labeled data
dataset_folder = 'data/tomra_all'
file_list = list(np.array(mypackage.Dataset.TOMRA_FILLET_LAYERS)[[0,5,9]]) + list(np.array(mypackage.Dataset.TOMRA_OBVIOUS_PLASTICS)[[0,4,5]])

X, Y = mypackage.Dataset.load_files(file_list, dataset_folder, with_labels=True)
_, X, _ = mypackage.Dataset.scale(None, X, scaler='GlobalStandardization')

print(X.shape, (X.min(), X.max()))
print(np.unique(Y, return_counts=True))

(6, 100, 100, 208) (-6.4139543, 6.5095677)
(array([0, 1, 2], dtype=int32), array([49821,  9311,   868]))


In [14]:
49821/(49821+9311+868) # = 0.83035

0.83035

In [3]:
# Parameters for the SVM grid search
SVM_GRID_PARAMS = [{'kernel': ['rbf'], 'gamma': [10, 1, 1e-1, 1e-2, 1e-3],
                                       'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                   {'kernel': ['linear'], 'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

data = mypackage.StackTransform(X, Y)

print("Running a grid search SVM")
print(SVM_GRID_PARAMS)
X_train, y_train = data.X_stack(), data.Y_stack().ravel()

clf_svm = sklearn.svm.SVC()
clf_svm = sklearn.model_selection.GridSearchCV(clf_svm, SVM_GRID_PARAMS, verbose=5, n_jobs=-1)
clf_svm.fit(X_train, y_train)
print("SVM best parameters : {}".format(clf_svm.best_params_))

Running a grid search SVM
[{'kernel': ['rbf'], 'gamma': [10, 1, 0.1, 0.01, 0.001], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}]
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:  8.3min

A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed: 81.8min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 438.1min
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed: 578.6min finished


SVM best parameters : {'C': 0.1, 'kernel': 'linear'}


In [4]:
# clf_svm.cv_results_

In [15]:
# Parameters for the SVM grid search
LR_GRID_PARAMS = {"C": [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], "penalty":["l1","l2"]} # l1 lasso l2 ridge

data = mypackage.StackTransform(X, Y)

print("Running a grid search Logistic Regression")
print(LR_GRID_PARAMS)
X_train, y_train = data.X_stack(), data.Y_stack().ravel()

clf_lr = sklearn.linear_model.LogisticRegression(max_iter=50000, solver='saga', multi_class='ovr', verbose=1, n_jobs=-1)
clf_lr = sklearn.model_selection.GridSearchCV(clf_lr, LR_GRID_PARAMS, verbose=5, n_jobs=-1)
clf_lr.fit(X_train, y_train)
print("Logistic Regression best parameters : {}".format(clf_lr.best_params_))

Running a grid search Logistic Regression
{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']}
Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   25.5s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed: 101.0min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed: 101.0min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.


convergence after 655 epochs took 201 seconds
convergence after 997 epochs took 303 seconds
convergence after 1024 epochs took 308 seconds
Logistic Regression best parameters : {'C': 1, 'penalty': 'l2'}


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  5.1min finished


### It might be interesting to show the heatmap of RBF accuracy obtained

In [6]:
clf_svm.cv_results_

{'mean_fit_time': array([ 405.976267  ,  421.79769845,  406.97805276,  173.65794334,
         183.94055305, 2311.59996829,  518.23389874,  126.63822365,
          70.75027251,   85.47845864, 4810.25047498,  742.03092475,
          65.42881446,   37.29366102,   50.91177459, 6770.76226673,
        1002.30814695,   50.89447789,   27.31695752,   31.61913695,
        9747.52352471, 1071.54126749,   50.77628088,   24.89664264,
          26.39575152, 9734.29923887, 1069.54106908,   79.56201329,
          26.23626299,   24.45584526, 8800.84149122, 1077.30069122,
          80.29480391,   31.89527183,   28.48183141,   33.95357237,
          27.11165547,   26.48153267,   32.08842134,   54.55674286,
         184.29543595, 1340.7419517 ]),
 'std_fit_time': array([  1.79910416,   7.66871511,   7.98948303,   3.38268335,
          5.69691828,  10.66576274,   3.33475762,   2.3722767 ,
          2.3005302 ,   3.24900467,  45.68809445,  52.88886747,
          2.95817815,   2.77178255,   2.89757234,  34.3

In [17]:
clf_lr.cv_results_

{'mean_fit_time': array([1330.40557075,   23.62707868,  349.48123388,   83.59796166,
         364.5597856 , 1791.75587268,  628.87643914,  599.92353125,
         808.47532415,  637.50490623,  821.23423424,  671.2059402 ,
         821.83805494,  463.10942788]),
 'std_fit_time': array([583.15320053,   1.16876138,  87.66407939,  17.37367105,
         42.93871553, 587.84908403,  91.59339602,  65.82731742,
         75.50456764,  68.24942381,  88.05393905,  70.53247137,
         61.09007597, 135.04497168]),
 'mean_score_time': array([0.00820637, 0.00953417, 0.00832429, 0.00827312, 0.00820403,
        0.00827971, 0.0083694 , 0.00812697, 0.00801415, 0.00808368,
        0.00830836, 0.00830598, 0.00881257, 0.00782328]),
 'std_score_time': array([1.84848248e-04, 2.66532820e-03, 8.94410513e-05, 4.95527651e-05,
        1.08614922e-04, 3.43256242e-04, 5.21973741e-04, 1.54125764e-04,
        2.98474634e-04, 1.87088297e-04, 3.33122425e-04, 5.53101095e-05,
        1.87415557e-03, 2.80198595e-04]),
 'pa

## See the SVM_LR_SummaryOfParameterSearch.ipynb script for the results

In [11]:
clf_svm.cv_results_['mean_test_score']

array([0.83035   , 0.83035   , 0.97186667, 0.97546667, 0.97173333,
       0.83035   , 0.83035   , 0.97506667, 0.97525   , 0.97498333,
       0.83035   , 0.9589    , 0.981     , 0.98283333, 0.97966667,
       0.83035   , 0.9713    , 0.98546667, 0.98503333, 0.98433333,
       0.83035   , 0.97093333, 0.98455   , 0.98688333, 0.986     ,
       0.83035   , 0.97093333, 0.9827    , 0.98583333, 0.98693333,
       0.83035   , 0.97093333, 0.98266667, 0.98273333, 0.98591667,
       0.98416667, 0.98583333, 0.98711667, 0.98695   , 0.98591667,
       0.98565   , 0.98406667])