### mount google drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### copy leaf data to local runtime for faster processing

In [0]:
!cp -r /content/drive/My\ Drive/ECE542_sp20_CompetitionData ./

### import libraries

In [0]:
import numpy as np
import pandas as pd
import os, random, math, glob, cv2, pickle
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.utils import shuffle
from sklearn.model_selection import  train_test_split
from keras.utils import to_categorical
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, classification_report

from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.feature import hog
from skimage import exposure

### define data paths

In [0]:
train_data_path = 'ECE542_sp20_CompetitionData/TrainData-C1/'
test_data_path = 'ECE542_sp20_CompetitionData/TestData/'
train_annotations_path = 'ECE542_sp20_CompetitionData/TrainData-C1/TrainAnnotations.csv'

### load annotations

In [0]:
df=pd.read_csv(train_annotations_path)
df.head()

# count of all class images
c=5
for i in range(c):
    print(df[df['annotation']==i].shape[0])

488
185
130
131
91


### compute HoG features image and generate data and label vectors

In [0]:
i=0
data_dir=train_data_path
file_nm = ''

x = []
y = []

for filenm in glob.glob(data_dir+'*.jpg'):
  
    fnm = filenm.split('/')[-1]
    fd = hog(imread(filenm), orientations=9, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=False, multichannel=True)
    x.append(fd)
    y.append(df[df['file_name']==fnm]['annotation'].iloc[0])

### split into train and val data - stratified; equal percentage from all classes

In [0]:
x = np.array(x)
y = np.array(y)

test_per=0.2
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = test_per, shuffle = True, stratify = y)

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

(820, 40716)
(820,)
(205, 40716)
(205,)


### pca decomposition

In [0]:
n_comp = 500
pca = PCA(n_components=n_comp)
x_train_pca = pca.fit_transform(x_train)
x_val_pca = pca.transform(x_val)

print(x_train_pca.shape, x_val_pca.shape)

(820, 500) (205, 500)


### Linear SVM for training data

In [0]:
clf = LinearSVC(max_iter=100000)
clf.fit(x_train_pca, y_train)
print('\nAccuracy: %f'%clf.score(x_val_pca, y_val))


Accuracy: 0.858537


### confusion matrix and classification report on validation data

In [0]:
ypred = clf.predict(x_val_pca)

print(confusion_matrix(y_val,ypred))
print(classification_report(y_val,ypred))

[[95  3  0  0  0]
 [12 23  2  0  0]
 [ 2  3 20  1  0]
 [ 0  0  3 22  1]
 [ 0  0  0  2 16]]
              precision    recall  f1-score   support

           0       0.87      0.97      0.92        98
           1       0.79      0.62      0.70        37
           2       0.80      0.77      0.78        26
           3       0.88      0.85      0.86        26
           4       0.94      0.89      0.91        18

    accuracy                           0.86       205
   macro avg       0.86      0.82      0.84       205
weighted avg       0.86      0.86      0.85       205



### read test filenames

In [0]:
test_list = []
test_dir = test_data_path
test_file_list=[]

for img_name in os.listdir(test_dir):
    test_file_list.append(img_name)

test_file_list=sorted(test_file_list)
print(len(test_file_list))

200


### vectorize test images and pca 

In [0]:
x_test = []

for filenm in test_file_list:
    fd = hog(imread(test_dir+filenm), orientations=9, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=False, multichannel=True)
    x_test.append(fd)

x_test = np.array(x_test)
x_test_pca = pca.transform(x_test)

### generate final predictions and check class distribution

In [0]:
nclass=5
final_pred = clf.predict(x_test_pca)
final_pred_cat = to_categorical(final_pred, num_classes=nclass)

print(final_pred_cat.shape)
print(np.bincount(final_pred))

(200, 5)


### save final predictions

In [0]:
curr_dt_string = datetime.now().strftime('%d_%m_%y__%H_%M_%S')

np.savetxt('predictions_'+curr_dt_string+'.csv', final_pred_cat, delimiter=',')
np.save('final_pred_'+curr_dt_string+'.npy', final_pred)

### save classifier as pickle model

In [0]:
with open('clf_'+curr_dt_string+'.pkl', 'wb') as f:
    pickle.dump(clf, f)

### load classifier from pickle file

In [0]:
with open('clf_'+curr_dt_string+'.pkl', 'rb') as f:
    clf1 = pickle.load(f)