A program that reads and processes images for a Support Vector Machine (SVM) to classify as images as good or bad.

In [1]:
import numpy as np
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline
import cv2

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import linear_model
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

  from numpy.core.umath_tests import inner1d


In [2]:
def read_images(paths): 
    """
    Reads in all images and returns list of picture id numbers based on the image name
    
    Parameters
    ----------
    paths : string
    
    Returns
    ----------
    images and list of id numbers
    """
    import numpy as np
    # Get list of images
    images = (glob(paths + '*.jpg'))
    # Read images from list
    data = [cv2.cvtColor(cv2.imread(file),cv2.COLOR_BGR2GRAY) for file in images]
    data = np.array(data)
    data = data.reshape((len(data),-1))
    
    print(data.shape)
    return data

In [3]:
def svm_layers(X_train, y_train, X_test, y_test):
    """
    Builds layers of Support Vector Machine
    Fits model to the data
    
    Parameters
    ------------
    X_train = array
    X_test = array
    y_train = data frame or array
    y_test = data frame or array
       
    Returns
    ------------
    model metrics evaluation
    """
    
    model = svm.SVC(gamma=0.001)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test,y_pred))
    accuracy = accuracy_score(y_test,y_pred)
    
    return model,accuracy

In [4]:
def supervised_models(model, X_train, y_train):
    """
    Fits supervised models to data and returns metrics
    
    Parameters
    ------------
    model = supervised learning model
    X_train = array
    y_train = data frame or array
       
    Returns
    ------------
    model metrics evaluation
    """
    
    model = model
    model.fit(X_train, y_train)
    probabilities = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)
    
    print(classification_report(y_test,y_pred))

    return model, probabilities, y_pred

In [5]:
df_train = pd.read_csv('/Users/micha/ea-applications/data/test-images.csv')

paths = '/Users/micha/ea-applications/data/training-test-images/Thermal/mytest/*MEDIA/'

train_images = read_images(paths)

y = np.array(df_train['Label'])

y = df_train['Label'].values

X_train, X_test, y_train, y_test = train_test_split(train_images, y, random_state=42, test_size=0.2)

print(df_train.head())
print(X_train.shape)
print(y_train)

(424, 327680)
  Image Date     MEDIA Image_Name  Label Unnamed: 4
0  2/16/2019  100MEDIA   DJI_0001      2        NaN
1  2/16/2019  100MEDIA   DJI_0002      2        NaN
2  2/16/2019  100MEDIA   DJI_0003      2        NaN
3  2/16/2019  100MEDIA   DJI_0004      2        NaN
4  2/16/2019  100MEDIA   DJI_0005      2        NaN
(339, 327680)
[0 0 0 0 0 0 3 3 0 0 2 0 0 3 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 3 0 2 3 0 0 0
 0 0 0 2 0 0 0 0 3 0 2 0 0 2 0 0 0 0 0 0 0 0 3 3 0 0 0 3 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 2 2 0 0 0 0 0 2 0 0 0
 0 0 0 0 0 0 0 2 0 0 0 0 0 3 0 2 0 1 0 0 1 3 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 3 1 0 0 0 1 3 0 0 2 3 0 0 0 0 0 0 0 0 0 1 0 0 0 0 2 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 2 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 3 0 0 0 0 0 0 0 0 0
 0 2 0 3 0 0 0 0 3 0 0 0 0 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 3
 3 0 2 0 0 0 0 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 2 0

In [6]:
# Run svm model
svm_model, metrics = svm_layers(X_train, y_train, X_test, y_test)
print(metrics)

             precision    recall  f1-score   support

          0       0.78      1.00      0.87        66
          1       0.00      0.00      0.00         1
          2       0.00      0.00      0.00         9
          3       0.00      0.00      0.00         9

avg / total       0.60      0.78      0.68        85

0.7764705882352941


  'precision', 'predicted', average, warn_for)


In [7]:
# Run logistic regression model
model = linear_model.LogisticRegression()
model_logistic, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.85      0.85      0.85        66
          1       0.00      0.00      0.00         1
          2       0.36      0.56      0.43         9
          3       0.20      0.11      0.14         9

avg / total       0.72      0.73      0.72        85

[7.11081697e-06 1.19368459e-14 4.60216062e-04 2.66326016e-03
 5.22833161e-11 3.75572768e-07 7.38848482e-05 5.13514129e-07
 6.95597250e-15 3.91478972e-14 4.40296495e-10 1.19485212e-04
 1.46182724e-11 4.11913008e-08 1.08650880e-15 6.87529809e-12
 3.18593885e-12 4.55304852e-18 4.73956060e-09 4.52919993e-14
 7.65369743e-08 5.81860631e-09 3.89081806e-07 5.30926412e-18
 4.49080374e-10 1.18462469e-01 1.01696531e-07 2.32750430e-03
 1.76647938e-07 5.79262054e-06 5.78933978e-06 1.05117032e-01
 1.88665106e-13 3.39000065e-13 1.19181851e-14 1.46332148e-08
 3.33981684e-13 1.38720040e-12 1.65590647e-09 2.03383239e-14
 5.36137477e-09 2.21860695e-07 3.10955411e-15 7.15152220e-10
 9.74656

  'precision', 'predicted', average, warn_for)


In [8]:
# Run random forest classifier
model = RandomForestClassifier()
model_logistic, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.84      0.97      0.90        66
          1       1.00      1.00      1.00         1
          2       0.86      0.67      0.75         9
          3       0.00      0.00      0.00         9

avg / total       0.76      0.84      0.79        85

[0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.1 0.  0.9 0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.1 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.1 0.1 0.  0.  0.  0.  0.1 0.  0.  0.  0.  0.  0.1 0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]


In [9]:
# Run gradient boosting classifier
model = GradientBoostingClassifier()
model_boosting, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.86      1.00      0.92        66
          1       1.00      1.00      1.00         1
          2       1.00      0.78      0.88         9
          3       0.00      0.00      0.00         9

avg / total       0.78      0.87      0.82        85

[2.78564873e-05 2.57362422e-05 7.58881419e-05 1.80237534e-04
 1.79025223e-04 2.57362422e-05 2.57362422e-05 2.57362422e-05
 2.15356210e-05 2.40026257e-05 2.57362422e-05 2.57221071e-05
 2.57232920e-05 2.34828602e-05 1.18350505e-04 1.75839684e-06
 2.79460244e-05 2.57344518e-05 2.57362422e-05 2.57298142e-05
 2.57362422e-05 2.57362422e-05 1.29895795e-04 2.57334074e-05
 2.57349572e-05 9.26537622e-04 2.57362422e-05 9.44828337e-01
 2.56638914e-05 2.57362422e-05 2.57362422e-05 2.57362422e-05
 2.57362422e-05 2.57362422e-05 2.49467238e-05 2.82998303e-05
 2.57362422e-05 2.57087146e-05 3.22956717e-05 3.27531140e-05
 2.57362422e-05 3.88167368e-05 4.86779714e-05 2.57352167e-05
 2.57362

  'precision', 'predicted', average, warn_for)
