In [None]:
import numpy as np
import matplotlib.pyplot as plt
import math
import copy

# torch
import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
from torch.utils import data
import torchvision
from torchvision import transforms
import torch.multiprocessing as mp

from pandas import DataFrame

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import make_blobs
from sklearn import model_selection
from sklearn.model_selection import train_test_split

from numpy import savetxt
from numpy import loadtxt

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

# Use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Cuda is available: ",torch.cuda.is_available())

## Classification data

In [None]:
# define classification problem
n_samples = 20640
num_classes = 5
std = 4
noise = 1
n_features = 6

# save problem
#X, y = make_blobs(n_samples, centers=num_classes, n_features=n_features, cluster_std=std)
#savetxt('data/X_classification.csv', X, delimiter=',')
#savetxt('data/y_classification.csv', y, delimiter=',')
#X_tmp, X_test, y_tmp, y_test = train_test_split(X, y, test_size=0.15, random_state=0)
#X_train, X_val, y_train, y_val = train_test_split(X_tmp, y_tmp, test_size=0.15, random_state=0)
#savetxt('data/X_test_classification.csv', X_test, delimiter=',')
#savetxt('data/y_test_classification.csv', y_test, delimiter=',')
#savetxt('data/X_val_classification.csv', X_val, delimiter=',')
#savetxt('data/y_val_classification.csv', y_val, delimiter=',')
#savetxt('data/X_train_classification.csv', X_train, delimiter=',')
#savetxt('data/y_train_classification.csv', y_train, delimiter=',')

# fetch classification problem
X = loadtxt('data/problem/X_classification.csv',delimiter=',')
y = loadtxt('data/problem/y_classification.csv',delimiter=',')
X_test = loadtxt('data/problem/X_test_classification.csv',delimiter=',')
y_test = loadtxt('data/problem/y_test_classification.csv',delimiter=',')
X_val = loadtxt('data/problem/X_val_classification.csv',delimiter=',')
y_val = loadtxt('data/problem/y_val_classification.csv',delimiter=',')
X_train = loadtxt('data/problem/X_train_classification.csv',delimiter=',')
y_train = loadtxt('data/problem/y_train_classification.csv',delimiter=',')

In [None]:
# plot hyperplane of problem
def plot_classification_problem(X, y, n_features, dim1=0, dim2=1):
    assert dim1 < n_features, "cannot plot in higher dimensions than the problem"
    assert dim2 < n_features, "cannot plot in higher dimensions than the problem"
    
    df = DataFrame(dict(x=X[:,dim1], y=X[:,dim2], label=y))
    colors = {0:'red', 1:'blue', 2:'green', 3:'pink', 4:'cyan',5:'yellow',6:'black',7:'gray'}
    fig, ax = plt.subplots()
    grouped = df.groupby('label')
    for key, group in grouped:
        group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
    plt.show()

plot_classification_problem(X_train, y_train, n_features)

In [None]:
# create the 2d vandermonde vectors (will be reshaped to 3d later when necessary)
def vandermonde_vec(dataset, n_instances, n_features, poly_order):
    u = np.zeros((n_instances, n_features*poly_order))
    
    # Get powers
    for row in range(n_instances):
        for col in range(n_features):
            u[row,col*poly_order:(col+1)*poly_order] = np.power(
                [dataset[row,col]]*poly_order, list(range(poly_order)))
    return u

## Models

In [None]:
class TTNet(nn.Module):

    def __init__(self, n_features, poly_order, num_output, rank):
        super(TTNet, self).__init__()  
        
        self.n_features = n_features
        self.poly_order = poly_order
        self.num_output = num_output
        self.rank = rank
        self.type = 'tt'

        Di = self.rank
        Dn = self.rank
        # Elements are drawn from a uniform distribution [-1/sqrt(D),1/sqrt(D)]
        bound_i = 1/np.sqrt(Di)
        bound_n = 1/np.sqrt(Dn)
        # bounds on the uniform distribution
        lb = 0.5*bound_i
        ub = 1.0*bound_i

        # input layer
        self.tt_cores = []
        for i in range(n_features):
            if i==0: 
                tn_size = (1,poly_order,self.rank)
            elif i==n_features-1:
                tn_size = (self.rank,poly_order,num_output)
            else:
                tn_size = (self.rank,poly_order,self.rank)
            
            k = 1/(np.sqrt(self.poly_order))
            g_i = Parameter(init.normal_(torch.empty(tn_size, requires_grad=True), mean=0, std=1)*k)
            self.tt_cores.append(g_i)
            
        self.tt_cores = nn.ParameterList(self.tt_cores)

    def get_n_params(self):
        pp=0
        for p in list(self.parameters()):
            nn=1
            for s in list(p.size()):
                nn = nn*s
            pp += nn
        return pp

    def forward(self, vec_input, batch_size, print_expr=False):
                
        vec = vec_input[:,:self.poly_order].reshape(batch_size,-1)
        
        # First do: G_i x_2 v_i
        mode2 = []
        for i in range(self.n_features):
            vec = vec_input[:,i*self.poly_order:(i+1)*self.poly_order].reshape(batch_size,-1)
            mode2.append(torch.einsum('abc,db -> dac', self.tt_cores[i], vec) )
            
        mode2[0] = mode2[0].reshape(batch_size,self.rank)
        mode2[-1] = mode2[-1].reshape(batch_size,self.rank,self.num_output)
        
        # Join all the results (based on equation 11 in the paper)
        result = mode2[0]
        for i in range(self.n_features-1):
            result = torch.einsum('ab,abd -> ad', result, mode2[i+1]) * 1/self.rank
        return result

In [None]:
class TRNet(nn.Module):
    
    def __init__(self, n_features, poly_order, num_output, rank):
        super(TRNet, self).__init__()  
        
        self.n_features = n_features
        self.poly_order = poly_order
        self.num_output = num_output
        self.rank = rank
        self.outer_dim = rank # outer dimensions of the cores, G
        self.type = 'tr'
        
        # Specify the dimensions of the core tensors
        # Here, they are all of order 3 and the dimension of the first and last mode is 3
        gi_size = tuple([self.outer_dim,poly_order,self.outer_dim]) # Dimension of the core tensors except the last
        gn_size = tuple([self.outer_dim,num_output,self.outer_dim]) # Dimension of the last cores tensor
        gstack_size = tuple([self.outer_dim,poly_order,self.outer_dim,n_features]) # Dimension of the stack of cores
        
        # Elements are drawn from a uniform distribution [-1/sqrt(D),1/sqrt(D)],
        # where D is the outer dimensions of the core
        bound_i = 1/math.sqrt(self.outer_dim)
        # bounds on the uniform distribution
        lb = 0.1*bound_i
        ub = 1.0*bound_i
        
        # The cores are now combined to give one long dimension which matched the one from vandermonde
        self.Gstack = Parameter(init.uniform_(torch.empty(gstack_size, requires_grad=True),a=lb,b=ub))
        
        # The last tensor as a different size as the inner dimension is the number of classes
        self.GN = Parameter(init.uniform_(torch.empty(gn_size, requires_grad=True),a=lb,b=ub))

    def forward(self, tensor_input, batch_size, print_expr=False):
        
        # Multiplication of Vandermonde vectors
        Gv_stack= torch.einsum('abcd, edb -> aecd',self.Gstack,tensor_input)
        
        # Multiplication of the cores to get f: G_i-1 x_31 G_i
        f_stack = Gv_stack[:,:,:,0]
        
        # The multiplication by the last core and the ring multiplication is not in the for-loop
        for i in range(1,self.n_features):
            f_stack = torch.einsum('abc, cbe -> abe',f_stack,Gv_stack[:,:,:,i])

        f_stack = torch.einsum('abc, cda -> bd',f_stack, self.GN)
        return f_stack

In [None]:
class CPNet(nn.Module):

    def __init__(self, n_features, poly_order, num_output, rank):
        super(CPNet, self).__init__()  
        
        self.rank = rank
        self.n_features = n_features
        self.poly_order = poly_order
        self.num_output = num_output
        self.type = 'cp'
        
        # weight tensors collected in one tensor
        tn_size = tuple([n_features] + [poly_order] + [rank] + [num_output]) # size of all tensors A_i
        self.A = Parameter(init.normal_(torch.empty(tn_size, requires_grad=True), std=0.575))

    def forward(self, vec_input, batch_size, print_expr=False):
        
        m = torch.einsum('abcd,eab->aced',self.A ,vec_input)
        f = torch.prod(m,0)
        
        return torch.sum(f,0)

## Train model

In [None]:
# train classification model
def train_classification_model(net, optimizer, criterion, num_epochs, n_features, X_train, X_val, y_train, y_val):
    # Training the model
    n_train_samples = len(X_train[:,0])
    n_val_samples = len(X_val[:,0])
    
    X_train = vandermonde_vec(X_train, n_train_samples, n_features, net.poly_order)
    X_val = vandermonde_vec(X_val, n_val_samples, n_features, net.poly_order)
    
    # reshape X_train, X_val to 3d vandermonde for tensor ring and cpd
    if (net.type == 'tr') or (net.type == 'cp'):
        X_train = np.reshape(X_train, (n_train_samples, n_features, net.poly_order))
        X_val = np.reshape(X_val, (n_val_samples, n_features, net.poly_order))
    
    # setting up lists for handling loss/accuracy
    train_acc = np.zeros(num_epochs)
    valid_acc = np.zeros(num_epochs)
    losses = -1*np.ones(num_epochs)
    loss = 0
    valid_acc_cur_best = 0
    best_model = copy.deepcopy(net)
    min_val_output = np.zeros(n_val_samples)
    min_val_preds = np.zeros(n_val_samples)

    for epoch in range(num_epochs):
        # Forward -> Backprob -> Update params
        ### Train
        net.train()
        loss = 0
        count = 0

        # Get data
        data = torch.Tensor(X_train)
        targets = torch.Tensor(y_train)
        count += 1

        # Transfer training data and targets to device
        data = data.to(device)
        targets = Variable(targets.long()).to(device)
        
        # Send it through the model
        output = net(data, n_train_samples)
        
        # compute gradients given loss
        loss = criterion(output, targets)
        optimizer.zero_grad()
        loss.backward()
        #optimizer.step()
        
        def closure():
            optimizer.zero_grad()
            output = net(data, n_train_samples)
            loss = criterion(output, targets)
            loss.backward()
            return loss
        optimizer.step(closure)
        losses[epoch] = loss.detach().cpu().numpy()
        
        #eval
        net.eval()

        ### Evaluate training
        train_preds, train_targs = [], []

        #for data, labels in training_generator:
        train_data = torch.Tensor(X_train)
        train_targets = torch.Tensor(y_train)
        train_data = train_data.to(device)
        
        ### classification
        train_output = net(train_data, n_train_samples)
        preds = torch.max(train_output, 1)[1]
        
        train_preds += list(preds.data.cpu().numpy())
        train_targs += list(train_targets)
        
        ### Evaluate validation
        val_preds, val_targs = [], []

        #for data, labels in validation_generator:
        val_data = torch.Tensor(X_val)
        val_targets = torch.Tensor(y_val)
        val_data = val_data.to(device)

        ### classification
        val_output = net(val_data,n_val_samples)
        preds = torch.max(val_output, 1)[1]
        
        val_preds += list(preds.data.cpu().numpy())
        val_targs += list(val_targets)
        
        if np.isnan(losses[epoch]):
            print("NaN encountered.")
            return best_model, train_acc, valid_acc, losses, valid_acc_cur_best, min_val_output, min_val_preds

        train_acc_cur = accuracy_score(train_targs, train_preds)
        valid_acc_cur = accuracy_score(val_targs, val_preds)
        
        if valid_acc_cur > valid_acc_cur_best:
            best_model = copy.deepcopy(net)
            valid_acc_cur_best = valid_acc_cur
            min_val_output = val_output.data.cpu().numpy()
            min_val_preds = preds.data.cpu().numpy()

        train_acc[epoch] = train_acc_cur
        valid_acc[epoch] = valid_acc_cur
        
        print("Epoch %2i : Train Loss %f , Train acc %f, Valid acc %f" % (
            epoch+1, losses[epoch], train_acc_cur, valid_acc_cur))

        #if epoch % (np.floor(num_epochs/10)) == 0:
        #    print("Epoch %2i : Train Loss %f , Train acc %f, Valid acc %f" % (
        #        epoch+1, losses[-1], train_acc_cur, valid_acc_cur))

        #if epoch == num_epochs-1:
        #    print("Last epoch %2i : Final Train Acc %f" % (epoch+1, train_acc_cur))
    
    return best_model, train_acc, valid_acc, losses, valid_acc_cur_best, min_val_output, min_val_preds

## Universal model

In [None]:
poly_orders = np.arange(4,6) #[1,2,3]
ranks = np.arange(1,16) #[1,...25]
num_output = num_classes

#tensor train classification
models = []
for poly_order in poly_orders:
    for rank in ranks:
        model = TTNet(n_features, poly_order, num_output, rank)
        model.to(device)
        models.append(model)

#train the different models
num_epochs = 200
losses_models = np.zeros((len(models), num_epochs))
train_acc_models = np.zeros((len(models), num_epochs))
valid_acc_models = np.zeros((len(models), num_epochs))

for i in range(len(models)):
    print("Model:",i+1)
    model = models[i]
    #optimizer = optim.Adam(model.parameters())
    optimizer = optim.LBFGS(model.parameters(), line_search_fn='strong_wolfe')
    criterion = nn.CrossEntropyLoss()
    models[i], train_acc, valid_acc, losses, valid_acc_best = train_classification_model(
        model, optimizer, criterion, num_epochs, n_features, X_train, X_val, y_train, y_val)
    losses_models[i] = losses
    train_acc_models[i] = train_acc
    valid_acc_models[i] = valid_acc

# save to csv file
savetxt('data/overfitting_poly_orders.csv', poly_orders, delimiter=',')
savetxt('data/overfitting_ranks.csv', ranks, delimiter=',')
savetxt('data/overfitting_train_acc_models.csv', train_acc_models, delimiter=',')
savetxt('data/overfitting_losses_models.csv', losses_models, delimiter=',')
savetxt('data/overfitting_valid_acc_models.csv', valid_acc_models, delimiter=',')

## Cross validation

In [None]:
def crossvalidation_classification(models, K, X_train, X_val, y_train, y_val):
    S = len(models)
    
    CV = model_selection.KFold(n_splits=K, shuffle=True, random_state=0)
    
    # observations to do CV on
    X_CV = np.concatenate((X_train, X_val))
    y_CV = np.concatenate((y_train, y_val))
    
    opt_models = []
    max_accs = []
    
    valid_accs = -1*np.ones(K*S)
    num_epochs = 100
    
    k=0
    for par_index, test_index in CV.split(X_CV):
        print('Computing CV fold: {0}/{1}..'.format(k+1,K))
        
        # extract training and test set for current CV fold
        X_tr, y_tr = X_CV[par_index,:], y_CV[par_index]
        X_va, y_va = X_CV[test_index,:], y_CV[test_index]
        
        max_acc = 0
        opt_model = None
        
        for s in range(S):
            model = models[s]
            
            poly_order = model.poly_order
            
            # define optimizer and loss criterion
            optimizer = optim.LBFGS(model.parameters())
            criterion = nn.CrossEntropyLoss()
                
            # train net
            model_trained, train_acc, valid_acc, losses, valid_acc_cur_best = train_classification_model(
                model, optimizer, criterion, num_epochs, n_features, X_tr, X_va, y_tr, y_va)
            
            valid_accs[k*S+s] = valid_acc_cur_best
            
            acc = valid_acc_cur_best
            if acc > max_acc:
                max_acc = acc
                opt_model = model_trained
                
        opt_models.append(opt_model)
        max_accs.append(max_acc)
        k+=1
    return opt_models, max_accs, valid_accs

### CV TT

In [None]:
ranks = loadtxt('data/cv_ranks.csv',delimiter=',')#range(3,15)
poly_orders = loadtxt('data/cv_poly_orders.csv',delimiter=',')#range(1,4)
models = []
for rank in ranks:
    for poly_order in poly_orders:
        net = TTNet(int(n_features), int(poly_order), int(num_output), int(rank))
        net.to(device)
        models.append(net)

K = 3
opt_models, opt_models_max_accs, valid_accs = crossvalidation_classification(
    models, K, X_train, X_val, y_train, y_val)

#savetxt('data/cv_ttcls_valid_accs.csv',valid_accs,delimiter=',')

opt_models_poly_order = []
opt_models_rank = []
for i in range(K):
    opt_models_poly_order.append(opt_models[i].poly_order)
    opt_models_rank.append(opt_models[i].rank)
    print('optimal poly_order:', opt_models[i].poly_order)
    print('optimal rank:', opt_models[i].rank)
    print('minimal validation accuracy of optimal model:', opt_models_max_accs[i])
    
#savetxt('data/cv_ttcls_opt_models_poly_order.csv', opt_models_poly_order,delimiter=',')
#savetxt('data/cv_ttcls_opt_models_rank.csv', opt_models_rank,delimiter=',')
#savetxt('data/cv_ttcls_opt_models_max_accs.csv', opt_models_max_accs,delimiter=',')

### CV TR

In [None]:
ranks = loadtxt('data/cv_ranks.csv',delimiter=',')#range(3,15)
poly_orders = loadtxt('data/cv_poly_orders.csv',delimiter=',')#range(1,4)
models = []
for rank in ranks:
    for poly_order in poly_orders:
        net = TRNet(int(n_features), int(poly_order), int(num_output), int(rank))
        net.to(device)
        models.append(net)

K = 3
opt_models, opt_models_max_accs, valid_accs = crossvalidation_classification(
    models, K, X_train, X_val, y_train, y_val)

#savetxt('data/cv_trcls_valid_accs.csv',valid_accs,delimiter=',')

opt_models_poly_order = []
opt_models_rank = []
for i in range(K):
    opt_models_poly_order.append(opt_models[i].poly_order)
    opt_models_rank.append(opt_models[i].rank)
    print('optimal poly_order:', opt_models[i].poly_order)
    print('optimal rank:', opt_models[i].rank)
    print('minimal validation accuracy of optimal model:', opt_models_max_accs[i])
    
#savetxt('data/cv_trcls_opt_models_poly_order.csv', opt_models_poly_order,delimiter=',')
#savetxt('data/cv_trcls_opt_models_rank.csv', opt_models_rank,delimiter=',')
#savetxt('data/cv_trcls_opt_models_max_accs.csv', opt_models_max_accs,delimiter=',')

### CV CP

In [None]:
ranks = loadtxt('data/cv_ranks.csv',delimiter=',')#range(3,15)
poly_orders = loadtxt('data/cv_poly_orders.csv',delimiter=',')#range(1,4)
models = []
for rank in ranks:
    for poly_order in poly_orders:
        net = TRNet(int(n_features), int(poly_order), int(num_output), int(rank))
        net.to(device)
        models.append(net)

K = 3
opt_models, opt_models_max_accs, valid_accs = crossvalidation_classification(
    models, K, X_train, X_val, y_train, y_val)

#savetxt('data/cv_cpcls_valid_accs.csv',valid_accs,delimiter=',')

opt_models_poly_order = []
opt_models_rank = []
for i in range(K):
    opt_models_poly_order.append(opt_models[i].poly_order)
    opt_models_rank.append(opt_models[i].rank)
    print('optimal poly_order:', opt_models[i].poly_order)
    print('optimal rank:', opt_models[i].rank)
    print('minimal validation accuracy of optimal model:', opt_models_max_accs[i])
    
#savetxt('data/cv_cpcls_opt_models_poly_order.csv', opt_models_poly_order,delimiter=',')
#savetxt('data/cv_cpcls_opt_models_rank.csv', opt_models_rank,delimiter=',')
#savetxt('data/cv_cpcls_opt_models_max_accs.csv', opt_models_max_accs,delimiter=',')

## Run optimal models and compare with test set

In [None]:
'''
% TT Cls
% poly order 2
% rank 10
% optacc 90.25307797537619470 percent

% TR Cls
% poly order 2
% rank 4
% optacc 90.30437756497947666 percent

% CP Cls
% poly order 2
% rank 3
% optacc 90.23597811217510811 percent'''

In [None]:
# check test set
def classification_test(net, X_test, y_test, n_features):
    n_test_samples = len(X_test[:,0])
    X_test = vandermonde_vec(X_test, n_test_samples, n_features, net.poly_order)
    # reshape X_train, X_val to 3d vandermonde for tensor ring and cpd
    if (net.type == 'tr') or (net.type == 'cp'):
        X_test = np.reshape(X_test, (n_test_samples, n_features, net.poly_order))
    
    data = torch.Tensor(X_test)
    targets = torch.Tensor(y_test)
    targets = Variable(targets.long())
    output = net(data, n_test_samples)
    preds = torch.max(output, 1)[1]
    criterion = nn.CrossEntropyLoss()
    loss = criterion(output, targets)

    cmat = confusion_matrix(targets, preds)
    accscore = accuracy_score(targets, preds)
    
    print("\nConfusion matrix:\n", cmat)
    print("\nTest set accuracy score:", accscore)
    print("\nLoss:", loss)
    #for name, param in net.named_parameters():
        #if param.requires_grad:
            #print(name)
            #print(param.data.size())
            #print(param.data)
    
    return accscore, loss

In [None]:
# TT
num_epochs = 100
poly_order = 2
rank = 10
num_output = num_classes

net = TTNet(int(n_features), int(poly_order), int(num_output), int(rank))
optimizer = optim.LBFGS(net.parameters(), line_search_fn='strong_wolfe')
criterion = nn.CrossEntropyLoss()
#net.to(device)
model_trained, train_acc, valid_acc, losses, valid_acc_cur_best, min_val_output, min_val_preds = train_classification_model(
                net, optimizer, criterion, num_epochs, n_features, X_train, X_val, y_train, y_val)
#savetxt('data/min_val_output_tt_classification.csv', min_val_output, delimiter=',')
#savetxt('data/min_val_preds_tt_classification.csv', min_val_preds, delimiter=',')
#accscore, loss = classification_test(model_trained, X_test, y_test, n_features)

In [None]:
# TR
num_epochs = 100
poly_order = 2
rank = 4
num_output = num_classes

net = TRNet(n_features, poly_order, num_output, rank)
optimizer = optim.LBFGS(net.parameters(), line_search_fn='strong_wolfe')
criterion = nn.CrossEntropyLoss()

model_trained, train_acc, valid_acc, losses, valid_acc_cur_best, min_val_output, min_val_preds = train_classification_model(
                net, optimizer, criterion, num_epochs, n_features, X_train, X_val, y_train, y_val)
#savetxt('data/min_val_output_tr_classification.csv', min_val_output, delimiter=',')
#savetxt('data/min_val_preds_tr_classification.csv', min_val_preds, delimiter=',')
#accscore, loss = classification_test(model_trained, X_test, y_test, n_features)

In [None]:
# CP
num_epochs = 100
poly_order = 2
rank = 3
num_output = num_classes

net = CPNet(int(n_features), int(poly_order), int(num_output), int(rank))
optimizer = optim.LBFGS(net.parameters(), line_search_fn='strong_wolfe')
criterion = nn.CrossEntropyLoss()

model_trained, train_acc, valid_acc, losses, valid_acc_cur_best, min_val_output, min_val_preds = train_classification_model(
                net, optimizer, criterion, num_epochs, n_features, X_train, X_val, y_train, y_val)

#savetxt('data/min_val_output_cp_classification.csv', min_val_output, delimiter=',')
#savetxt('data/min_val_preds_cp_classification.csv', min_val_preds, delimiter=',')
#accscore, loss = classification_test(model_trained, X_test, y_test, n_features)