In [16]:
# Challenge 3: Introduction to Custom Machine Learning

from PIL import Image
import numpy as np
from pathlib import Path
from PIL import ImageOps
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

category = 0 # for label
x = [] # Store data
y = [] # Store label

# names of each classification.
names = [ "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest","Naive Bayes"]

# Instance of each classifications.
classifiers = [
    SVC(kernel="linear", C=0.025),
    SVC(kernel="rbf"),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianNB()
]

trained_models = [] # keep the trainined model for further use.

def prepare_dataset(file):    
    try:
        im = Image.open(file)
        arr = np.array(im)
        x.append(arr.flatten())
        y.append(category)        
    except:
        print("cannot add to dataset:", file)
        
# Loop all data and create dataset
for dir in Path('../data/gear_images_128').iterdir():
    category += 1
    print(dir, ':', category)
    for file in dir.iterdir():
        prepare_dataset(file)   
        
# Split data into 70% train and 30% test. 
(x_train, x_test, y_train, y_test) = train_test_split(x, y, test_size=0.3, random_state=0)

# Loop names and classifiers at same time by using for - zip
for name, clf in zip(names, classifiers):
    # train model by using trainig data
    clf.fit(x_train, y_train)   
    trained_models.append(clf)
    # check prediction score by using test data
    score = clf.score(x_test, y_test)
    print(name,":",score)

../data/gear_images_128/pulleys : 1
../data/gear_images_128/helmets : 2
../data/gear_images_128/crampons : 3
../data/gear_images_128/harnesses : 4
../data/gear_images_128/insulated_jackets : 5
../data/gear_images_128/axes : 6
../data/gear_images_128/rope : 7
../data/gear_images_128/boots : 8
../data/gear_images_128/hardshell_jackets : 9
../data/gear_images_128/carabiners : 10
../data/gear_images_128/tents : 11
../data/gear_images_128/gloves : 12
Linear SVM : 0.8885400313971743
RBF SVM : 0.21036106750392464
LowGamma SVM : 0.21036106750392464
Decision Tree : 0.6562009419152276
Random Forest : 0.8414442700156985
Naive Bayes : 0.5557299843014128


In [17]:
# Do prediction in local file
test_img = Image.open('../data/gear_images_128/insulated_jackets/10167913x1063714_zm.jpeg') # Should be 5
test_img2 = Image.open('../data/gear_images_128/boots/10018755x1036824_zm.jpeg') # Should be 8
test_img3 = Image.open('../data/gear_images_128/hardshell_jackets/10116634x1038116_zm.jpeg') # Should be 9
test_arr = np.array(test_img)
test_arr2 = np.array(test_img2)
test_arr3 = np.array(test_img3)
predict_data = []
predict_data.append(test_arr.flatten())
predict_data.append(test_arr2.flatten())
predict_data.append(test_arr3.flatten())
for name, trained_model in zip(names, trained_models):
    print(name, ':', trained_model.predict(predict_data))

Linear SVM : [5 8 9]
RBF SVM : [9 8 9]
LowGamma SVM : [9 8 9]
Decision Tree : [9 8 9]
Random Forest : [5 8 9]
Naive Bayes : [9 8 9]


In [18]:
import requests
from io import BytesIO

from PIL import Image
from PIL import ImageOps
import numpy as np
import os, sys
from pathlib import Path

# borrow from Challenge 2
def resize(img):    
    try:
        # Create thumbnail. Image.thumbnail replace the file.
        img.thumbnail((128, 128), Image.LANCZOS)        
        # Adding white padding
        w,h = img.size
        border=(0, 0, 128-w, 128-h) #'left,top,right,bottom'
        color='#FFFFFF'
        padding_im= ImageOps.expand(img, border, color)   
        return padding_im
    except:
        print("failed to resize")
        
def normalize(img):    
    try:        
        norm_img = ImageOps.autocontrast(img,cutoff=0.2)
        return norm_img
    except:
        print("failed normalize")

# Get image from file URL.
url = "https://shop.epictv.com/sites/default/files/ae42ad29e70ba8ce6b67d3bdb6ab5c6e.jpeg" # This is 2 (helmet)
response = requests.get(url)

# Open image and pre-process.
im = Image.open(BytesIO(response.content)).convert('RGB')
resized_img = resize(im)
normalized_img = normalize(resized_img)    

test_arr = np.array(normalized_img)
predict_data = []
predict_data.append(test_arr.flatten())
for name, trained_model in zip(names, trained_models):
    print(name, ':', trained_model.predict(predict_data))

Linear SVM : [2]
RBF SVM : [9]
LowGamma SVM : [9]
Decision Tree : [2]
Random Forest : [2]
Naive Bayes : [1]


In [19]:
# Challenge 3: Introduction to Custom Machine Learning - 64x64

from PIL import Image
import numpy as np
from pathlib import Path
from PIL import ImageOps
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

category = 0 # for label
x = [] # Store data
y = [] # Store label

# names of each classification.
names = [ "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest","Naive Bayes"]

# Instance of each classifications.
classifiers = [
    SVC(kernel="linear", C=0.025),
    SVC(kernel="rbf"),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianNB()
]

trained_models_64 = [] # keep the trainined model for further use.

def prepare_dataset(file):    
    try:
        im = Image.open(file)
        arr = np.array(im)
        x.append(arr.flatten())
        y.append(category)        
    except:
        print("cannot add to dataset:", file)
        
# Loop all data and create dataset
for dir in Path('../data/gear_images_64').iterdir():
    category += 1
    print(dir, ':', category)
    for file in dir.iterdir():
        prepare_dataset(file)   
        
# Split data into 70% train and 30% test. 
(x_train, x_test, y_train, y_test) = train_test_split(x, y, test_size=0.3, random_state=0)

# Loop names and classifiers at same time by using for - zip
for name, clf in zip(names, classifiers):
    # train model by using trainig data
    clf.fit(x_train, y_train)   
    trained_models.append(clf)
    # check prediction score by using test data
    score = clf.score(x_test, y_test)
    print(name,":",score)

../data/gear_images_64/pulleys : 1
../data/gear_images_64/helmets : 2
../data/gear_images_64/crampons : 3
../data/gear_images_64/harnesses : 4
../data/gear_images_64/insulated_jackets : 5
../data/gear_images_64/axes : 6
../data/gear_images_64/rope : 7
../data/gear_images_64/boots : 8
../data/gear_images_64/hardshell_jackets : 9
../data/gear_images_64/carabiners : 10
../data/gear_images_64/tents : 11
../data/gear_images_64/gloves : 12
Linear SVM : 0.869701726844584
RBF SVM : 0.21036106750392464
Decision Tree : 0.6875981161695447
Random Forest : 0.8194662480376766
Naive Bayes : 0.5572998430141287


In [31]:
# Randomize the train data

from PIL import Image
import numpy as np
from pathlib import Path
from PIL import ImageOps
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

category = 0 # for label
x = [] # Store data
y = [] # Store label

# names of each classification.
names = [ "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest","Naive Bayes"]

# Instance of each classifications.
classifiers = [
    SVC(kernel="linear", C=0.025),
    SVC(kernel="rbf"),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianNB()
]

trained_models = [] # keep the trainined model for further use.

def prepare_dataset(file):    
    try:
        im = Image.open(file)
        arr = np.array(im)
        x.append(arr.flatten())
        y.append(category)        
    except:
        print("cannot add to dataset:", file)
        
# Loop all data and create dataset
for dir in Path('../data/gear_images_64').iterdir():
    category += 1
    print(dir, ':', category)
    for file in dir.iterdir():
        prepare_dataset(file)   
        
# Split data into 70% train and 30% test. 
(x_train, x_test, y_train, y_test) = train_test_split(x, y, test_size=0.3, random_state=10)

# Loop names and classifiers at same time by using for - zip
for name, clf in zip(names, classifiers):
    # train model by using trainig data
    clf.fit(x_train, y_train)   
    trained_models.append(clf)
    # check prediction score by using test data
    score = clf.score(x_test, y_test)
    print(name,":",score)

../data/gear_images_64/pulleys : 1
../data/gear_images_64/helmets : 2
../data/gear_images_64/crampons : 3
../data/gear_images_64/harnesses : 4
../data/gear_images_64/insulated_jackets : 5
../data/gear_images_64/axes : 6
../data/gear_images_64/rope : 7
../data/gear_images_64/boots : 8
../data/gear_images_64/hardshell_jackets : 9
../data/gear_images_64/carabiners : 10
../data/gear_images_64/tents : 11
../data/gear_images_64/gloves : 12
Linear SVM : 0.8602825745682888
RBF SVM : 0.18995290423861852
Decision Tree : 0.6499215070643642
Random Forest : 0.8053375196232339
Naive Bayes : 0.565149136577708


In [34]:
# Confusion Matrix (display the result in matrix) and Classification Report

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

for name, trained_model in zip(names, trained_models):
    y_pred = trained_model.predict(x_test)
    result = confusion_matrix(y_test, y_pred, labels=[i for i in range(1,13)])
    print(name)
    print(result)
    print(classification_report(y_test, y_pred, target_names=[str(i) for i in range(1,13)]))


Linear SVM
[[ 10   3   0   0   0   0   1   0   1   0   0   1]
 [  0  27   0   0   1   0   0   1   0   1   1   1]
 [  0   0  31   0   0   0   0   0   0   1   1   0]
 [  1   0   0  49   0   0   1   0   0   0   3   2]
 [  1   0   0   0  52   0   0   1   8   0   0   0]
 [  0   0   2   0   0  29   0   0   0   0   0   0]
 [  0   0   2   0   2   0  60   0   1   0   0   6]
 [  0   0   0   0   1   0   0  28   2   1   0   0]
 [  3   0   0   0   9   0   1   0 107   0   0   0]
 [  0   1   0   0   0   1   4   0   0  72   0   0]
 [  0   0   2   1   1   0   0   0   0   0  39   0]
 [  4   1   0   4   2   0   1   3   3   1   0  44]]
             precision    recall  f1-score   support

          1       0.53      0.62      0.57        16
          2       0.84      0.84      0.84        32
          3       0.84      0.94      0.89        33
          4       0.91      0.88      0.89        56
          5       0.76      0.84      0.80        62
          6       0.97      0.94      0.95        31
    

  'precision', 'predicted', average, warn_for)


Naive Bayes
[[  4   3   2   0   1   0   2   0   4   0   0   0]
 [  0  28   0   0   0   0   1   0   0   0   0   3]
 [  0   0  26   0   0   0   0   0   0   0   7   0]
 [  1   0   3  40   0   0   4   3   0   0   3   2]
 [  0   1   0   0  14   0   0   0  43   0   0   4]
 [  0   7   8   0   0  11   0   0   0   3   2   0]
 [ 10  26   6   1   3   0  15   0   2   2   0   6]
 [  0   0   0   0   0   0   0  26   6   0   0   0]
 [  2   0   0   0   5   0   2   0 110   1   0   0]
 [  8  26   1   2   0   0   5   1   0  33   0   2]
 [  2   1   6   0   0   0   0   0   0   0  33   1]
 [  3   5   1   2   3   0   6  14   8   0   1  20]]
             precision    recall  f1-score   support

          1       0.13      0.25      0.17        16
          2       0.29      0.88      0.43        32
          3       0.49      0.79      0.60        33
          4       0.89      0.71      0.79        56
          5       0.54      0.23      0.32        62
          6       1.00      0.35      0.52        31
   