Imports

In [10]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
pd.set_option('display.max_columns', None)
from glob import glob
import time
from IPython.display import display

import cv2
from cv2 import imread
from cv2 import resize as Resize

from sklearn.cluster import MiniBatchKMeans

from sklearn.model_selection import train_test_split
from sklearn import metrics as metrics
from sklearn.model_selection import cross_validate
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures

from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier

np.random.seed(42)

In [11]:
def myTimer(func):
    def wrapper(*args, **kwargs):
        t   = time.time()
        ret = func(*args, **kwargs)
        print('computation time = %s s' % (time.time() - t))
        return(ret)
    return(wrapper)

@myTimer
def doML(alg, settings, XTrain, YTrain, XTest, YTest, title=None):
    if title: 
        print('====   %s   ====' % title)
    else:        
        print('===========================================================================')
    print(alg)
    print(settings)
    myAlg = alg(**settings)
    myAlg.fit(XTrain, YTrain)
    YPred = myAlg.predict(XTest)
    print(metrics.classification_report(YTest, YPred))
    print('===========================================================================')
    return(myAlg)

@myTimer
def doCV(alg, grid, X, Y, title=None):
    if title: 
        print('%s' % title)
    else:        
        print('===========================================================================')
    
    myGS = GridSearchCV(alg(), grid)
    myGS.fit(X, Y)

    res = pd.DataFrame(myGS.cv_results_)
    display(res)
    return(myGS)

In [12]:
def increase_brightness(img, value=60):
    '''
    simple approach for improving image brightness
    input/output images in RGB
    '''
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2RGB)
    return(img)

def transformIMG(pandasRow, newShape=(50,50), color=False):
    '''
    load csv file, load and pre-process image
    returns gray scale or colour image
    '''
    img = np.asarray( imread('' + pandasRow['Path']) )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    imgBright = increase_brightness(img, value=40)
    imgROI = imgBright[ pandasRow['Roi.Y1']:pandasRow['Roi.Y2'], pandasRow['Roi.X1']:pandasRow['Roi.X2'] ]
    imgTrafo = Resize(imgROI, nuShape)
    imgGray = cv2.cvtColor(imgTrafo, cv2.COLOR_RGB2GRAY)
    return(imgTrafo if color else imgGray)

def prepData(pdDataFrame, nuShape, col=False):
    '''
    load all images, call pre-processing
    return table of flattened images and labels
    '''
    X, Y = [], []
    x, y = np.ogrid[ 0:nuShape[0], 0:nuShape[0] ]
    x, y = x - int(.5*nuShape[0]), y - int(.5*nuShape[0])
    if col:
        
        for k in range(pdDataFrame.shape[0]):
            
            d = pdDataFrame.iloc[k]
            img = transformIMG(d, newShape=nuShape, color=col)
            X.append(img[x**2 + y**2 <= .25*nuShape[0]**2,:])
            Y.append( d['ClassId'] )
    else:
        for k in range(pdDataFrame.shape[0]):
            
            d = pdDataFrame.iloc[k]
            
            img = transformIMG(d, newShape=nuShape)
            X.append(img[x**2 + y**2 <= .25*nuShape[0]**2])
            Y.append( d['ClassId'] )
    return(np.asarray(X), np.asarray(Y))

In [13]:
#DATA LOADING

data = pd.read_csv('Train.csv')
nuShape = (32,32)
Xt, Yt = prepData(data, nuShape)

In [14]:
XFTrain, XFTest, YFTrain, YFTest = train_test_split(Xt, Yt, test_size=.2, random_state=42)

Train MLP using all training data

In [16]:
clf = MLPClassifier(hidden_layer_sizes=500,  max_iter=2000, batch_size=100)
bestMLP = clf.fit(Xt,Yt)

Train RFC using all training data

In [18]:
clf = RandomForestClassifier(n_estimators= 200, min_samples_split=10,max_leaf_nodes=None,bootstrap=False, class_weight='balanced')
bestRFC = clf.fit(Xt,Yt)

TEST DATA

In [19]:
Testdata = pd.read_csv('Test.csv')
nuShape = (32,32)
XRealTest, YRealTest = prepData(Testdata, nuShape)

In [20]:
#classify test using MLP
YRealPred = bestMLP.predict(XRealTest)
print(metrics.classification_report(YRealTest, YRealPred))

              precision    recall  f1-score   support

           0       0.64      0.92      0.75        60
           1       0.89      0.83      0.86       720
           2       0.82      0.93      0.87       750
           3       0.94      0.90      0.92       450
           4       0.88      0.81      0.84       660
           5       0.89      0.76      0.82       630
           6       0.62      0.62      0.62       150
           7       0.89      0.74      0.81       450
           8       0.77      0.81      0.79       450
           9       0.87      0.94      0.90       480
          10       0.95      0.86      0.90       660
          11       0.84      0.78      0.81       420
          12       0.75      0.85      0.80       690
          13       0.91      0.98      0.94       720
          14       0.85      0.91      0.88       270
          15       0.87      0.94      0.91       210
          16       0.89      0.88      0.89       150
          17       0.83    

In [21]:
#classify test using RF
YRealPred2 = bestRFC.predict(XRealTest)
print(metrics.classification_report(YRealTest, YRealPred2))

              precision    recall  f1-score   support

           0       1.00      0.25      0.40        60
           1       0.80      0.92      0.86       720
           2       0.77      0.87      0.82       750
           3       0.80      0.76      0.78       450
           4       0.89      0.79      0.83       660
           5       0.65      0.71      0.68       630
           6       0.80      0.57      0.67       150
           7       0.79      0.83      0.81       450
           8       0.78      0.69      0.73       450
           9       0.96      0.95      0.96       480
          10       0.97      0.94      0.95       660
          11       0.89      0.95      0.92       420
          12       0.86      0.94      0.90       690
          13       0.95      0.99      0.97       720
          14       0.89      0.98      0.93       270
          15       0.93      0.96      0.95       210
          16       1.00      0.97      0.98       150
          17       0.96    