# Introduction to Machine Learning - The Final Model

#### By Jeroen Smienk & Marnick van der Arend

Here we import the best dataset, use the best normalizer, train the best classifier with the best options and export the classifier and normalizer.

In [1]:
# IMPORTS AND NOTEBOOK SETUP
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Importing the dataset
data = pd.read_csv('../dataset-numpy/dataset-v8.csv')
data.columns

Index([u'area', u'width', u'contours', u'radius', u'circle_dist', u'rect_dist',
       u'hull_radius', u'aspect_ratio', u'centroid_x', u'centroid_y',
       u'corners', u'circles', u'angle', u'weight_0_0', u'weight_0_1',
       u'weight_0_2', u'weight_0_3', u'weight_0_4', u'weight_0_5',
       u'weight_0_6', u'weight_0_7', u'weight_1_0', u'weight_1_1',
       u'weight_1_2', u'weight_1_3', u'weight_1_4', u'weight_1_5',
       u'weight_1_6', u'weight_1_7', u'weight_2_0', u'weight_2_1',
       u'weight_2_2', u'weight_2_3', u'weight_2_4', u'weight_2_5',
       u'weight_2_6', u'weight_2_7', u'weight_3_0', u'weight_3_1',
       u'weight_3_2', u'weight_3_3', u'weight_3_4', u'weight_3_5',
       u'weight_3_6', u'weight_3_7', u'weight_4_0', u'weight_4_1',
       u'weight_4_2', u'weight_4_3', u'weight_4_4', u'weight_4_5',
       u'weight_4_6', u'weight_4_7', u'weight_5_0', u'weight_5_1',
       u'weight_5_2', u'weight_5_3', u'weight_5_4', u'weight_5_5',
       u'weight_5_6', u'weight_5_7', u'wei

## Normalization

We normalize everything but the number of holes and the label.

In [3]:
# Normalization
columns = data.columns.values

columns_to_not_normalize = ['label']
columns_to_normalize = [c for c in columns if not c in columns_to_not_normalize]

# MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
data[columns_to_normalize] = pd.DataFrame(scaler.fit_transform(data[columns_to_normalize]), columns=columns_to_normalize)

## Training the classifier

The SVM/C classifier with `C=3.75, gamma=0.1` proved to be most successful.

In [4]:
# SVM
from sklearn import svm
from sklearn.decomposition import PCA

# Splitting our data
X_train, Y_train = data.iloc[:,:-1], data.iloc[:,-1]

classifier = svm.SVC(kernel='rbf', C=3.75, gamma=0.1)
classifier.fit(X_train, Y_train)

SVC(C=3.75, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Exporting


In [5]:
# Exporting the scaler and classifier
from sklearn.externals import joblib

joblib.dump(scaler, '../classifiers/scaler_svm_v8.joblib') 
joblib.dump(classifier, '../classifiers/classifier_svm_v8.joblib') 

['../classifiers/classifier_svm_v8.joblib']