In [None]:
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.model_selection import train_test_split
import pymongo
import scipy
import numpy as np
from db_interface import DbInterface,DbRecord

In [None]:
def getDbName(datasetName, modelName):
    return datasetName + '_' + modelName

In [None]:
def createSvmModel(kernelType):
    return svm.SVC(kernel=kernelType)

In [None]:
def transform(features, trType=None):
    tr = lambda x: x.flatten()
    if trType == 'pca':
        features = [x.flatten() for x in features]
        tr = lambda features: PCA(n_components=0.95).fit_transform(features)
        return tr(features)
    elif trType == 'logm':
        def matrixLog(f):
            if not len(f.shape) == 2:
                # Reshape into a 2d matrix
                lastDim = f.shape[-1]
                f = f.reshape((int(f.size/lastDim), lastDim))
            # Normalize f
            abs_sum = abs(np.sum(f))
            f /= (max(10e-8, abs_sum))
            f = f @ f.T
            f = f + 10e-5*np.identity(f.shape[0])
            res = np.tril(scipy.linalg.logm(f))
            res = res.flatten()
            return res # res[res != 0]
        tr = matrixLog
            
    return [tr(feature) for feature in features]

In [None]:
def train_svm(svmModel, client, datasetName, modelName, layer):    
    dbCursor = client.getDB(getDbName(datasetName, modelName))

    records = [dbCursor.getRecord(layer, recordId) for recordId in dbCursor.getRecordIds(layer)]
    obs = np.array([x.feature for x in records])
    target = [x.animalId for x in records]

    # print('Training Size: {}, Test Size: {}'.format(len(X_train), len(X_test)))
    max_acc = 0
    best_tr_type = None
    best_kernel_type = None
    for trType in ['logm']: # [None, 'pca', 'logm']:
        try:
            samples = transform(obs, trType)
        except ValueError:
            print('Transformation {} couldnt be applied for layer {}, skipping it'.format(trType, layer))
            continue
        X_train, X_test, y_train, y_test = train_test_split(samples, target, test_size=0.25)

        for kernelType in ['linear', 'rbf']:
            svmModel = createSvmModel(kernelType)
            try:
                svmModel.fit(X_train, y_train)
            except ValueError as e:
                raise e
                print('Failed to fit SVM model with kernel {}, for layer {}, transform {}, skipping'.format(kernelType, layer, trType))
                continue

            y_pred = svmModel.predict(X_test)
            acc = metrics.accuracy_score(y_test, y_pred)
            print('Layer Name: {}, Kernel Type: {}, Transofrm: {}, Accuracy: {}'
                      .format(layer, kernelType, trType, acc))
            if acc > max_acc:
                print('New max accuracy using layer {}, Kernel: {}, Transform {}. Value: {}'.format(layer, kernelType, trType, acc))
                best_tr_type = trType
                best_kernel_type = kernelType

    return max_acc, best_tr_type, best_kernel_type

In [None]:
modelNames = ['alexnet', 'googlenet']
datasetNames = ['amur']
client = DbInterface()

overall_max_acc = 0
max_acc_layer = None
max_acc_transform = None
max_acc_model = None
best_kernel_type = None

for dsName,modelName in [(x,y) for x in datasetNames for y in modelNames]:
    dbCursor = client.getDB(getDbName(dsName, modelName))
    layerNames = dbCursor.getCollectionNames()
    for layer in layerNames:
        print('Starting Training for model {}, over layer {}'.format(modelName, layer))
        acc, trType, kernelType = train_svm(model, client, dsName, modelName, layer)
        if acc > overall_max_acc:
            print('Global Max accuracy being set, Value: {}, Model: {}, Layer: {}, Transform: {}'.
                      format(acc, modelName, layer, trType))
            overall_max_acc = acc
            max_acc_layer = layer
            max_acc_model = modelName
            max_acc_transform = trType
            best_kernel_type = kernelType

In [None]:
dbCursor = client.getDB('amur_alexnet')
records = [dbCursor.getRecord('pool5', recordId) for recordId in dbCursor.getRecordIds('pool5')]

In [None]:
obs = np.array([x.feature for x in records])

In [None]:
features = obs[0] / (sum(obs[0]) ** 2) ** (1/2)

In [None]:
obs[0].size / obs[0].shape[-1]

In [None]:
features = features.reshape(256 * 6,6)

In [None]:
features = features @ features.T

In [None]:
features.shape

In [None]:
obs[0].shape

In [None]:
f = obs[0]

In [None]:
f = f.reshape(256*6,6)
abs_sum = abs(np.sum(f))

max(10e-8,abs_sum)

In [None]:
np.sum(f)

In [None]:
samples = transform(obs, 'logm')

In [None]:
samples[0].shape

In [None]:
obs[0].size

In [None]:
a = np.array([[1.,-2.],[3.,-4.]])
a / abs(sum(a))

In [None]:
np.reciprocal([3, 7])