A program that reads and processes images for a Support Vector Machine (SVM) to classify as images as good or bad.

In [1]:
import numpy as np
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline
import cv2

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import linear_model
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

  from numpy.core.umath_tests import inner1d


In [2]:
def read_images(paths): 
    """
    Reads in all images and returns list of picture id numbers based on the image name
    
    Parameters
    ----------
    paths : string
    
    Returns
    ----------
    images and list of id numbers
    """
    import numpy as np
    #Get list of images
    images = (glob(paths + '*.jpg'))
    #Read images from list
    data = [cv2.cvtColor(cv2.imread(file),cv2.COLOR_BGR2GRAY) for file in images]
    data = np.array(data)
    data = data.reshape((len(data),-1))
    
    print(data.shape)
    return data

In [3]:
def svm_layers(X_train, y_train, X_test, y_test):
    """
    Builds layers of Support Vector Machine
    Fits model to the data
    
    Parameters
    ------------
    X_train = array
    X_test = array
    y_train = data frame or array
    y_test = data frame or array
       
    Returns
    ------------
    model metrics evaluation
    """
    
    model = svm.SVC(gamma=0.001)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test,y_pred))
    accuracy = accuracy_score(y_test,y_pred)
    
    return model,accuracy

In [4]:
def supervised_models(model, X_train, y_train):
    """
    Fits supervised models to data and returns metrics
    
    Parameters
    ------------
    model = supervised learning model
    X_train = array
    y_train = data frame or array
       
    Returns
    ------------
    model metrics evaluation
    """
    
    model = model
    model.fit(X_train, y_train)
    probabilities = model.predict_proba(X_test)[:,1]
    y_pred = model.predict(X_test)
    
    print(classification_report(y_test,y_pred))

    return model, probabilities, y_pred

In [5]:
df_train = pd.read_csv('/Users/micha/ea-applications/data/test-images.csv')

paths = '/Users/micha/ea-applications/data/training-test-images/'

train_images = read_images(paths)

y = np.array(df_train['Label'])

y = df_train['Label'].values

X_train, X_test, y_train, y_test = train_test_split(train_images, y, random_state=42, test_size=0.2)

print(df_train.head())
print(X_train.shape)
print(y_train)

(43, 327680)
                          Image_Name  Label
0             BIRD_806937_Therm_1104      1
1          CROSSBOW_707661_Therm_657      1
2   DEERFIELD_BEACH_703541_Therm_377      1
3  DEERFIELD_BEACH_703542_Therm_2718      1
4           HARRIS_203635_Therm_1175      1
(34, 327680)
[1 1 0 1 0 0 0 1 0 0 0 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 0 0 0 1 0 0 1]


In [6]:
svm_model, metrics = svm_layers(X_train, y_train, X_test, y_test)
print(metrics)

             precision    recall  f1-score   support

          0       0.00      0.00      0.00         4
          1       0.56      1.00      0.71         5

avg / total       0.31      0.56      0.40         9

0.5555555555555556


  'precision', 'predicted', average, warn_for)


In [7]:
model = linear_model.LogisticRegression()
model_logistic, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.40      0.50      0.44         4
          1       0.50      0.40      0.44         5

avg / total       0.46      0.44      0.44         9

[8.06901310e-01 1.55005221e-02 1.98119473e-03 2.31767891e-07
 2.47852622e-04 9.99999475e-01 6.73386986e-02 8.41799383e-01
 9.91225365e-01]


In [8]:
model = RandomForestClassifier()
model_logistic, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.50      0.25      0.33         4
          1       0.57      0.80      0.67         5

avg / total       0.54      0.56      0.52         9

[0.7 0.5 0.7 0.6 0.8 0.9 0.4 0.6 0.7]


In [9]:
model = GradientBoostingClassifier()
model_boosting, probabilities, y_pred = supervised_models(model, X_train, y_train)
print(probabilities)

             precision    recall  f1-score   support

          0       0.67      0.50      0.57         4
          1       0.67      0.80      0.73         5

avg / total       0.67      0.67      0.66         9

[9.99722706e-01 2.20359494e-04 6.45937056e-02 2.67220682e-01
 9.99582226e-01 9.99722706e-01 9.35385530e-01 9.87544707e-01
 9.99722706e-01]
