# Train the Model with the Augmented Best Dataset

In [1]:
import pickle
import numpy as np
import timeit
import os
from sklearn import svm
from sklearn.preprocessing import StandardScaler

## Classifier

In [2]:
# RGF SVM
# classifier instance with best parameters
clf = svm.SVC(C=10, kernel='rbf', gamma='auto')

## Train and save

In [3]:
dataset_path =\
    './datasets/rgb-augmented-false-positives/dataset-pix-16-16-cell-2-2-histeq-1.pkl'

with open(dataset_path, mode='rb') as f:
    dataset = pickle.load(f)
    # load features and labels
    X = dataset['features']
    y = dataset['labels']
    print('Number of features: ' + str(X.shape[1]))
    print('Number of examples: ' + str(X.shape[0]))
    # normalize features
    X_scaler = StandardScaler().fit(X)
    scaled_X = X_scaler.transform(X)
    # Train the classifier
    start_time = timeit.default_timer()
    clf.fit(scaled_X, y)
    end_time = timeit.default_timer()
    print('Run time: %.4f minutes' %((end_time-start_time)/60))
    # save classifier
    save_path =\
        './clfs/clf-augmented-c-10-kernel-rbf-gamma-auto-pix-16-16-cell-2-2-histeq-1.pkl'
    with open(save_path, mode='wb') as ff:
        pickle.dump(clf, ff)
    scaler_save_path =\
        './datasets/rgb/X-scaler-augmented-dataset-pix-16-16-cell-2-2-histeq-1.pkl'
    with open(scaler_save_path, mode='wb') as ff:
        pickle.dump(X_scaler, ff)
    print('model and scaler saved.')

Number of features: 972
Number of examples: 17863
Run time: 0.5910 minutes
model and scaler saved.


---