# BactClassifier Notebook
## This jupyter notebook is a summary of workflow adapted towards classifying bacterias

### Step 1:
### Import Libraries

In [1]:
import cv2, glob, os, sys, pickle
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

### Step 2:
### Extract features

##### Using InceptionV3 model to extract features

In [4]:
model = tf.keras.applications.InceptionV3(include_top=False,
                                          weights="imagenet",
                                          input_shape=(1532, 2048, 3),
                                          pooling="avg")

##### Utility function to read Inage file 
##### Normalizes to 0-1
##### Reshapes to a tensor to match InceptionV3 model

In [5]:
def readImageFile(path):
    try:
        I = cv2.imread(path)
        I = I / 255
        I = np.reshape(I, (1, I.shape[0], I.shape[1], I.shape[2]))
    except Exception as e:
        print("\nError reading image file")
        print("\nError Message: " + str(e))
        sys.exit(-1)
    return I

##### Get inage data as list of tensor
##### Get labels as list
##### Get filenames as list (to track)

In [None]:
nbPath = os.path.abspath("run.ipynb")
dataFolder = os.path.join(os.path.dirname(nbPath), "data")
classes = glob.glob(os.path.join(dataFolder, "*"))

labels = []
data = []
filename = []
for label in classes:
    classPath = os.path.join(dataFolder, label)
    files = glob.glob(os.path.join(classPath, "*"))
    for imageFile in files:
        data.append(readImageFile(imageFile))
        filename.append(os.path.split(imageFile)[1])
        labels.append(os.path.split(label)[1])

##### Feed data to Inception and get features
##### Create a dict

In [None]:
featureDict = {}
    print("\nExtracting features from Model")
    for index in range(0, len(data)):
        featureDict[filenames[index]] = []
        featureDict[filenames[index]] = model.predict(data[index])[0].tolist()
        featureDict[filenames[index]].append(labels[index])

##### Writing features to a csv file

In [None]:
print("\nWriting extracted features to file: features.csv")
with open('features.csv', 'w') as f:
    dataFrame = pd.DataFrame(featureDict)
    dataFrame = dataFrame.T
    dataFrame.to_csv("features.csv")
f.close()
print("\nFeatue file written")

### Step 3:
### Train an SVM classifier

##### Prepare data

In [None]:
csvPath = os.path.join(os.path.dirname(nbPath), "features.csv")
dataset = pd.read_csv(self.dataPath)
X = dataset.iloc[:, 1:-1].values
Y = dataset.iloc[:, -1].values
labelEncoder = LabelEncoder()
Y = labelEncoder.fit_transform(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.10, random_state = 0)

##### Fit Classifier

In [None]:
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, Y_train)

##### Evaluate Classifier

In [None]:
Y_pred = classifier.predict(X_test)
cm = confusion_matrix(Y_test, Y_pred)
print("\nConfusion Matrix for the trained model is :")
print(cm)
print("\nAccuracy of trained model is :")
print(accuracy_score(Y_test, Y_pred))

In [None]:
classifierEval = SVC(kernel = 'linear', random_state = 0)
cv = KFold(n_splits=10, random_state=1, shuffle=True)
scores = cross_val_score(classifierEval, X_train, Y_train, scoring='accuracy', cv=cv, n_jobs=-1)
# report performance
print('Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

### Step 4:
### Check results on test folder

In [None]:
def getOneImageFeatures(path):
    I = readImageFile(path)
    features = model.predict(I)
    return features

In [None]:
testFiles = glob.glob(os.path.join(testPath, "*.tif"))
    for testFile in testFiles:
        features = getOneImageFeatures(testFile)
        prediction = classifier.predict(features)
        print("Image file "+ os.path.split(testFile)[1] + "is predicted as :" + labelEncoder.inverse_transform(prediction[0]))