# Shoe Detection

1. Machine Learning
  1. import data
  2. build/train model (SVM)
  3. normalize/scale data
  4. dimensionality reduction (PCA)
  5. train/build model (SVM) on dimensionally reduced data
  6. find best C with GridSearchCV
2. Computer Vision
  1. preprocessing
  2. etc..
3. Object Detection (ML + CV)
  1. somehting
  2. and
  3. this

*imports*

In [80]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler, scale
from sklearn.decomposition import PCA

%matplotlib inline

# 1. Machine Learning

*useful method*

In [81]:
''' reads a directory of images, flattens the features to a single row then returns as a dataframe '''
def extract_features(file_path, features):
    # could define HOG here
    new_df = []
    for img_path in features:
        img = mpimg.imread(file_path + img_path)
        image_flat = img.flatten()
        new_df.append(image_flat)
    return pd.DataFrame(new_df)

### A. Import data

*import csv features/labels*

In [82]:
data_path = "../images"

labels = pd.read_csv(data_path + "/labels.csv")

*get features paths only*

In [83]:
images_path = labels["Path"]

images_path.head()

0         /positive_images/new_vans-both-23.jpg
1     /positive_images/new_vans-outside-014.jpg
2      /positive_images/new_vans-inside-217.jpg
3     /positive_images/new_vans-outside-028.jpg
4    /positive_images/new_adidas-inside-238.jpg
Name: Path, dtype: object

*import features*

In [84]:
images_path = labels["Path"]

X = extract_features(data_path, images_path)

In [85]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,6390,6391,6392,6393,6394,6395,6396,6397,6398,6399
0,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
1,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
2,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
3,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255
4,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,255


*import labels*

In [86]:
y = labels["Label"]

In [87]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Label, dtype: int64

*info about data*

In [88]:
print("Features", X.shape)
print("Labels", y.shape)

Features (7433, 6400)
Labels (7433,)


*train test split*

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=1)

In [90]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (5203, 6400)
X_test shape: (2230, 6400)
y_train shape: (5203,)
y_test shape: (2230,)


### B. Build/Train model (SVM)

In [91]:
svm_shoe = SVC(C=1, kernel="linear", gamma=0.0005, random_state=2)

svm_shoe.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0005, kernel='linear',
  max_iter=-1, probability=False, random_state=2, shrinking=True,
  tol=0.001, verbose=False)

*test/predict*

In [92]:
y_predict = svm_shoe.predict(X_test)

print("Misclassified samples: %d" % (y_test != y_predict).sum() )
print("\nSVM Faces Prediction:\n", y_predict)

Misclassified samples: 0

SVM Faces Prediction:
 [0 0 0 ... 0 1 0]


*accuracy*

In [93]:
accuracy = accuracy_score(y_test, y_predict)

print("Accuracy: %.2f" % accuracy)

Accuracy: 1.00


*confusion matrix*

In [94]:
conf_mat = confusion_matrix(y_test, y_predict)

print("\nConfusion Matrix:\n\n", conf_mat)


Confusion Matrix:

 [[1484    0]
 [   0  746]]


### C. Normalize/Scale data

In [95]:
X_norm = scale(X)

In [96]:
print("Normalized/Scale preview:\n")
print(X_norm)

Normalized/Scale preview:

[[ 0.98724336  0.98704414  0.98723477 ...  1.08583034  1.08562976
   1.08566549]
 [ 0.98724336  0.98704414  0.98723477 ...  1.08583034  1.08562976
   1.08566549]
 [ 0.98724336  0.98704414  0.98723477 ...  1.08583034  1.08562976
   1.08566549]
 ...
 [-0.79452394 -0.53474073 -0.62454154 ... -0.17418928 -0.11609245
  -0.06770806]
 [ 0.76883318  0.77794393  0.77851554 ...  0.75957526  0.7037741
   0.73853364]
 [-0.15078866 -0.95294115 -1.49420501 ... -0.45544366 -0.48671705
  -0.53801572]]


*train/test split normalized data*

In [97]:
X_train_norm, X_test_norm, y_train_norm, y_test_norm = train_test_split(X_norm, y, test_size=.3, random_state=1)

In [99]:
print("Normalized X_train shape:", X_train_norm.shape)
print("Normalized X_test shape:", X_test_norm.shape)
print("Normalized y_train shape:", y_train_norm.shape)
print("Normalized y_test shape:", y_test_norm.shape)

Normalized X_train shape: (5203, 6400)
Normalized X_test shape: (2230, 6400)
Normalized y_train shape: (5203,)
Normalized y_test shape: (2230,)


### D. Dimensionality Reduction (PCA)

In [100]:
k = 60
my_pca = PCA(n_components=k)

*fit/transform*

In [101]:
X_train_pca = my_pca.fit_transform(X_train_norm)
X_test_pca = my_pca.transform(X_test_norm)

In [102]:
print("X_train PCA shape:", X_train_pca.shape)
print(" X_test PCA shape:", X_test_pca.shape)

X_train PCA shape: (5203, 60)
 X_test PCA shape: (2230, 60)


### E. Build/Train model (SVM) On Dimensionally Reduced data

In [107]:
svm_shoe_norm = SVC(C=1, kernel="linear", gamma=0.0005, random_state=2)

svm_shoe_norm.fit(X_train_pca, y_train_norm)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0005, kernel='linear',
  max_iter=-1, probability=False, random_state=2, shrinking=True,
  tol=0.001, verbose=False)

*test/predict*

In [108]:
y_predict_norm = svm_shoe.predict(X_test_norm)

print("Misclassified samples: %d" % (y_test_norm != y_predict_norm).sum() )
print("\nSVM Faces Prediction:\n", y_predict_norm)

Misclassified samples: 746

SVM Faces Prediction:
 [0 0 0 ... 0 0 0]


*accuracy*

In [109]:
accuracy_norm = accuracy_score(y_test_norm, y_predict_norm)

print("Accuracy: %.2f" % accuracy_norm)

Accuracy: 0.67


*confusion matrix*

In [110]:
conf_mat_norm = confusion_matrix(y_test_norm, y_predict_norm)

print("\nConfusion Matrix:\n\n", conf_mat_norm)


Confusion Matrix:

 [[1484    0]
 [ 746    0]]


### F. Find best C with GridSearchCV

*merge data*

In [117]:
X_pca_combined = np.vstack((X_train_pca, X_test_pca))
y_combined = y_train_norm.append(y_test_norm, ignore_index=True)

In [118]:
print(X_pca_combined.shape)
print(y_combined.shape)

(7433, 60)
(7433,)


*new SVM to test*

In [119]:
svm_shoe_test = SVC(C=1, kernel="linear", gamma=0.0005, random_state=2)

*fit new SVM*

In [124]:
svm_shoe_test.fit(X_pca_combined, y_combined)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0005, kernel='linear',
  max_iter=-1, probability=False, random_state=2, shrinking=True,
  tol=0.001, verbose=False)

*init GridSearchCV*

In [126]:
search_list = [0.1, 1, 10, 100, 1e3, 5e3, 1e4, 5e4, 1e5]

param_grid = dict(C=search_list)

grid = GridSearchCV(
    svm_shoe_test,
    param_grid,
    cv=10,
    scoring="accuracy"
)

*find best accuracy*

In [127]:
grid.fit(X_pca_combined, y_combined)

GridSearchCV(cv=10, error_score='raise',
       estimator=SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0005, kernel='linear',
  max_iter=-1, probability=False, random_state=2, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 1, 10, 100, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='accuracy', verbose=0)

*report best accuracy*

In [128]:
print("Score:", grid.best_score_)

Score: 0.9997309296381004


*report best C*

In [129]:
print("\nBest C:", grid.best_params_)


Best C: {'C': 0.1}


# 2. Computer Vision