In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from tensorflow.keras.datasets import mnist

import cv2

from src.Node import Node
from src.NCMTree import NCMTree
from src.NCMForest import NCMForest
from headers.NCMClassifier import NCMClassifier
from headers.OneCentroid import OneCentroid
from headers.utils import *

def test_gen_Node():
    #fit and plot a generative node
    digits=load_digits()
    X = digits.data
    y = digits.target
    for i in range(len(np.unique(y))):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
        Norm = Normalizer()
        X_train = Norm.fit_transform(X_train)
        X_test = Norm.transform(X_test)
        print("Node with root class = ", i)
        node = Node(None, False, 1, 21, method_split='generative', distance="euclidean", root_class=i, alpha=0.99)
        node.fit(X_train, y_train)
        print("Train set split : ")
        node.plot(X_train)
        print("Test set performance : ")
        class_index_test = np.where(np.isin(y_test, i))
        y_test[y_test > 0] = 0
        y_test[class_index_test] = 1
        pred = node.splitting_clf.predict(X_test)
        print("Test Accuracy : ", np.mean(pred==y_test))
        confmat = confusion_matrix(y_test, pred)
        print("Confusion Matrix : ")
        print(confmat)


def test_forest():        
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train=X_train.reshape(X_train.shape[0], 784)
    X_test=X_test.reshape(X_test.shape[0], 784)
    
    clf = NCMForest(n_classes=10, tree_multiplier = 1, max_depth=20, min_samples_split=2, min_samples_leaf=5,
                         method_max_features="sqrt", method_split='maj_class',
                       distance="euclidean", root_distance = "mahalanobis", alpha=0.95, nbgenlayer=1)
    clf.fit(X_train, y_train)
    
    # print the classification report
    print(classification_report(y_test, clf.predict(X_test), digits=5))
    

def deskew(img, imgSize):
    # calculate image moments
    m = cv2.moments(img)
    if abs(m['mu02']) < 1e-2:
        # no deskewing needed
        return img.copy()

    # calculate skew based on central moments
    skew = m['mu11'] / m['mu02']

    # calculate affine transformation to correct skewness
    M = np.float32([[1, skew, -0.5*imgSize*skew], [0, 1, 0]])

    # apply affine transformation
    img = cv2.warpAffine(img, M, (imgSize, imgSize), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)

    return img

def HOG_descriptors():
    # Load the mnist dataset
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    """
    #Block made to remove certain classes from the training and testing set, for testing on partial dataset
    #Enter classes you wish to remove into the list below
    classtodelete=[]
    
    class_to_delete_index_train = np.where(np.isin(y_train, classtodelete))
    class_to_delete_index_test = np.where(np.isin(y_test, classtodelete))
    
    X_train = np.delete(X_train, class_to_delete_index_train, 0)
    y_train = np.delete(y_train, class_to_delete_index_train)
    X_test = np.delete(X_test, class_to_delete_index_test, 0)
    y_test = np.delete(y_test, class_to_delete_index_test)
    """
    imsize = 28 # size of image (28x28)

    # HOG parameters:
    winSize = (imsize, imsize) # 28, 28
    blockSize = (imsize//2, imsize//2) # 14, 14    
    cellSize = (imsize//2, imsize//2) #14, 14
    blockStride = (imsize//4, imsize//4) # 7, 7
    nbins = 9
    signedGradients = True
    derivAperture = 1
    winSigma = -1.0
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64

    # define the HOG descriptor
    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma, 
                        histogramNormType, L2HysThreshold, gammaCorrection, nlevels, signedGradients)

    # compute HOG descriptors
    train_descriptors = []
    for i in range(X_train.shape[0]):
        X_train[i] = deskew(X_train[i], 28) # deskew the current image
        descriptor = hog.compute(X_train[i]) # compute the HOG features
        train_descriptors.append(descriptor) # append it to the train decriptors list
    
    test_descriptors = []
    for i in range(X_test.shape[0]):
        X_test[i] = deskew(X_test[i], 28) # deskew the current image
        descriptor = hog.compute(X_test[i]) # compute the HOG features
        test_descriptors.append(descriptor) # append it to the test descriptors list
    
    #train_descriptors = np.array(train_descriptors)
    train_descriptors = np.resize(train_descriptors, (X_train.shape[0], 81))
    
    #test_descriptors = np.array(test_descriptors)
    test_descriptors = np.resize(test_descriptors, (X_test.shape[0], 81))

    return train_descriptors, test_descriptors, y_train, y_test

def HOG_test():
    #Build and test forest with HOG descriptors
    X_train, X_test, y_train, y_test = HOG_descriptors()
    # classifier
    clf = NCMForest(n_classes=10, tree_multiplier = 1, max_depth=20, min_samples_split=2, min_samples_leaf=5,
                         method_max_features="sqrt", method_split='maj_class',
                       distance="euclidean", root_distance = "mahalanobis", alpha=0.95, nbgenlayer=1)
    clf.fit(X_train, y_train)
    
    # print the classification report
    print(classification_report(y_test, clf.predict(X_test), digits=5))

def test_newclass():
    #This tests whether or not a potential new class can be detected by training the roots on 9/10 (or 8/10, 7/10 etc...) classes and checking if the samples that weren't part of the training set are rejected by all roots.
    newclass = 10 #newclass = 10 will set it so all classes are present, used to test class recognition, to test new class detection set newclass as any from 0-9
    X_train, X_test, y_train, y_test = HOG_descriptors()
    #printing the shapes of the vectors 
    print('X_train: ' + str(X_train.shape))
    print('Y_train: ' + str(y_train.shape))
    print('X_test:  '  + str(X_test.shape))
    print('Y_test:  '  + str(y_test.shape))
    
    #Get indexes of a class to isolate in train set
    class_index_train = np.where(np.isin(y_train, newclass)) #when newclass = 10, returns null
    
    #isolate the class from X and y, i.e create X and y without newclass
    X_truncated = np.delete(X_train, class_index_train, 0)
    y_truncated = np.delete(y_train, class_index_train)
    df_input = pd.DataFrame(X_truncated)
    df_input['y'] = y_truncated
    
    tree_multiplier = 1
    n_classes=len(np.unique(y_truncated))
    classes = np.unique(y_truncated)
    print("Train Classes : ", classes)
    print("New Class : ", newclass)
    rootslist=[]
    for i in range(n_classes):
        #Since generative roots have no random factor when being fitted or when predicting. And since the lower levels of a tree don't affect the upper levels, it is sufficient to simply train the root nodes instead of the entire tree (and easier to fetch results).
        node = Node(None, False, 1, 21, method_split='generative', root_class=i, alpha=0.99, root_distance="mahalanobis")
        node.fit(X_truncated, y_truncated)
        rootslist.append(node)
          
    preds = np.zeros((n_classes, len(y_test)))
    for root in rootslist:
        root_class = root.root_class
        pred = root.predict_splitting_function(X_test)
        ones = np.where(np.isin(pred, 1))
        preds[root_class, ones] = 1
    
        
    f = open("test_HOG.txt", "a")
    for classtofind in range(len(classes)): #outputs reject rates for all classes, TODO: use y_pred vector
        rejectlist=[]
        #Get indexes of a class to isolate in test set
        class_index_test = np.where(np.isin(y_test, classtofind))
        print("", file=f)
        print("Class to find : ", classtofind, file=f)

        for c in range(len(classes)):
            reject = round(np.mean(preds[c, class_index_test][0] == 0), 3)*100
            rejectlist.append(reject)
            print("Combined rejection rate for samples of class {} by roots of class {} : {}".format(classtofind, c, reject), file=f)

        detection = 0
        for index in range(len(rejectlist)): #template for class detection, currently if reject<50%, has no real effect on programm
            if (rejectlist[index]<50):
                print("Identified as class ", index)
                detection = 1
        if (detection==0):
            print("New class detected !")
    f.close()    
    print("See test_HOG.txt file in file system for results")
    
if __name__ =="__main__":
    #Uncomment what you want to test
    print("")
    #print("-----------GENERATIVE NODE----------")
    #test_gen_Node()
    print("")
    #print("-----------Forest with pixel features----------")
    #test_forest()
    print("")
    #print("-----------Forest with HOG----------")
    #HOG_test()
    print("")
    #print("-----------New Class/outliers detection----------")
    #test_newclass()





