In [None]:
############################# Import Section #################################

## Imports related to PyTorch
import torch
import torchvision
import torch.nn as nn
import torch.utils.data as Data
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torchvision import transforms, utils
from torch.utils.data import TensorDataset, DataLoader

## Generic imports
import os
import time
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
from sklearn.metrics import classification_report
from copy import deepcopy
import math
import random

## Dependencies classes and functions
from utils import gridRing
from utils import asMinutes
from utils import timeSince
from utils import getWeights
from utils import save_checkpoint
from utils import getListOfFolders

## Import Model
from DyanOF import OFModel,creatRealDictionary,fista

############################# Import Section #################################

In [None]:
def create_numpy_dataset(model,size_dictionary,keys,device):
    model.eval()
    with torch.no_grad():
        cs = []
        y_train = []
        for key in keys:
            X,y = size_dictionary[key]
            X = X.to(device)
            y = y.to(device)
            T = X.shape[1]
    #         c = model.forward(X,T).mean(-1)
#             c = model.forward(X,T).view(-1,D*(N+1))
    #         c,_ = torch.abs(model.forward(X,T)).max(-1)
    #         c = torch.abs(model.forward(X,T)).sum(-1)
#             c = model.encoder_.forward(X)
            _,_,c = model.forward(X,T)
    
#             c = model.get_c(X,T)

            cs.append(c.cpu().numpy())
            y_train.extend(y.cpu().ravel().numpy())
    X_train = np.vstack(cs)
    return X_train,y_train

In [None]:
def create_ilkay_dataset(model,size_dictionary,keys,device):
    model.eval()
    with torch.no_grad():
        x_train = []
        y_train = []
        for key in keys:
            X,Y = size_dictionary[key]
            for i in range(X.shape[0]):
                x = X[i].to(device)
                y = Y[i].to(device)
                T = x.shape[0]                
                
#                 embedding = model.encoder_.forward(x.unsqueeze(0)).squeeze(0)
                _,_,embedding = model.forward(x.unsqueeze(0),T)
                embedding.squeeze(0)

#                 embedding = model.get_c(X,T)

                x_train.append(embedding.cpu().numpy())
                y_train.extend(y.cpu().ravel().numpy())

    return x_train,y_train

In [None]:
def create_numpy_dataset_uneven(model,size_dictionary,keys,device):
    model.eval()
    with torch.no_grad():
        cs = {}
        ys = {}
        for key in keys:
            X,y = size_dictionary[key]
            X = X.to(device)
            y = y.to(device)
            T = X.shape[1]
    #         c = model.forward(X,T).mean(-1)
#             c = model.forward(X,T).view(-1,D*(N+1))
    #         c,_ = torch.abs(model.forward(X,T)).max(-1)
    #         c = torch.abs(model.forward(X,T)).sum(-1)
#             c = model.encoder_.forward(X)
            _,_,c = model.forward(X,T)
#             c = model.get_c(X,T)


            if cs.get(key) is None:
                cs[key] = [c.cpu().numpy()]
                ys[key] = deepcopy(y.cpu().ravel().numpy().tolist())
            else:
                cs[key].append(c.cpu().numpy())
                ys[key].extend(y.cpu().ravel().numpy())
    for key in keys:
        cs[key] = np.vstack(cs[key])
    return cs,ys

In [None]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """
        x = x + self.pe[:x.size(1)].permute(1,0,2)
        return x

In [None]:
class encoder(nn.Module):
    def __init__(self,D,embed_dim,latent_dim,nhead,encoder_layers=1,device='cuda:0'):
        super(encoder,self).__init__()
        self.D = D
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.device = device
        self.nhead = nhead
        
        self.input_projection = nn.Linear(D,embed_dim)
        
        self.tencoder = nn.ModuleList([nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1) for i in range(encoder_layers)])
#         self.tencoder1 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1)
#         self.tencoder2 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1)

        self.projection = nn.Linear(embed_dim,latent_dim)
        self.pos_encoder = PositionalEncoding(embed_dim)

        
    def forward(self,x):
#         x = self.pos_encoder(x)
#         x = torch.tanh(self.input_projection(x))
        x = self.pos_encoder(self.input_projection(x))
        for encoder in self.tencoder:
            x = torch.tanh(encoder(x))
#         x = torch.tanh(self.tencoder1(x))
#         x = torch.tanh(self.tencoder2(x))

        latent = self.projection(x)
        
        return latent

In [None]:
class decoder(nn.Module):
    def __init__(self,D,embed_dim,latent_dim,nhead,decoder_layers=1,device='cuda:0'):
        super(decoder,self).__init__()
        self.D = D
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.device = device
        self.nhead = nhead
        
        self.output_projection = nn.Linear(latent_dim,embed_dim)
        
        
        self.tdecoder = nn.ModuleList([nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1) for i in range(decoder_layers)])

#         self.tdecoder1 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1)
#         self.tdecoder2 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=nhead,batch_first=True,dropout=.1)

        self.projection = nn.Linear(embed_dim,D)
        self.pos_encoder = PositionalEncoding(embed_dim)

    def forward(self,x):
#         x = self.pos_encoder(x)
#         x = torch.tanh(self.output_projection(x))
        x = self.pos_encoder(self.output_projection(x))
        for decoder in self.tdecoder:
            x = torch.tanh(decoder(x))
#         x = torch.tanh(self.tdecoder1(x))
#         x = torch.tanh(self.tdecoder2(x))

        x = self.projection(x)
        
        return x

In [None]:
class TDYANT(nn.Module):
    def __init__(self, 
                 Drr, 
                 Dtheta,
                 N,
                 D,
                 embed_dim,
                 latent_dim,
                 nhead,
                 N_class,
                 encoder_layers=1,
                 decoder_layers=1,
                 device='cuda:0',
                clamp=2):
        super(TDYANT, self).__init__()
        
        self.rr = nn.Parameter(Drr)
        self.theta = nn.Parameter(Dtheta)
        #self.T = T
        self.device = device
        self.latent_dim = latent_dim
        self.embed_dim = embed_dim
        self.nhead = nhead
        
        self.encoder_ = encoder(D,embed_dim,latent_dim,nhead,encoder_layers,device)
        self.decoder_ = decoder(D,embed_dim,latent_dim,nhead,decoder_layers,device)
        self.clamp = clamp
    def forward(self, x,T):
        if self.clamp == 0:
            latent = self.encoder_(x)
        else:
            latent = torch.tanh(self.encoder_(x))*self.clamp #torch.clamp(self.encoder_(x),-2,2)
#         latent = latent + torch.randn_like(latent)*np.sqrt(0.1)
        dic = creatRealDictionary(T,self.rr,self.theta,self.device)
        
        ## for UCF Dataset:
        # 0.1
        sparsecode = fista(dic,latent,0.1,100,self.device)
        y = torch.matmul(dic,sparsecode)
        
        ## for Kitti Dataset: sparsecode = fista(dic,x,0.01,80,self.gid)
        
        x = self.decoder_(y)
        
        # x is the outer layer , y is the inner layer
        return x,latent,y
    
    def get_c(self,x,T):
        if clamp == 0:
            latent = self.encoder_(x)
        else:
            latent = torch.tanh(self.encoder_(x))*self.clamp
        
        dic = creatRealDictionary(T,self.rr,self.theta,self.device)
        
        ## for UCF Dataset:
        sparsecode = fista(dic,latent,0.1,100,self.device)
        
        # x is the outer layer , y is the inner layer
        return sparsecode

In [None]:
def create_data_dictionary(df,labels,chunk_size=1,task="classification"):
    size_dictionary = {}
    for i in df.index.unique():
        x = torch.FloatTensor(df.loc[i].values)
#         x = (x - x.mean(0,keepdim=True))/x.std(0,keepdim=True)
        size = x.shape[0]
        if chunk_size > 1:
            size = int(size/chunk_size)
            
        if task == "classification":
            y = torch.LongTensor([labels.loc[i].item()])
        else:
            y = torch.FloatTensor([labels.loc[i].item()])
            
        if size_dictionary.get(size) is None:
            if chunk_size > 1:
                x = torch.chunk(x,chunk_size)
                size_dictionary[size] = list(zip(deepcopy(x),[y]*chunk_size))
            else:
                size_dictionary[size] = [(x,y)]
        else:
            if chunk_size > 1:
                x = torch.chunk(x,chunk_size)
                y = [y]*chunk_size
                size_dictionary[size].extend(list(zip(x,y)))
            else:
                size_dictionary[size].append((x,y))

    for key in size_dictionary.keys():
        Xs, ys = list(zip(*size_dictionary[key]))
        size_dictionary[key] = (torch.stack(Xs,0),torch.stack(ys,0))
    
    keys = np.array(list(size_dictionary.keys()))[np.argsort([size_dictionary[key][0].shape[0] for key in size_dictionary.keys()])[::-1]]
    return size_dictionary,keys

In [None]:
def create_data_dictionary_extended(df,labels):
    MAX = np.max([df.loc[i].shape[0] for i in df.index.unique()])
    size_dictionary = {}
    for i in df.index.unique():
        x = df.loc[i].values
#         x = (x - x.mean(0,keepdim=True))/x.std(0,keepdim=True)

        N_x = x.shape[0]
        N_repeat = MAX-N_x
        repeat_values = np.repeat(np.expand_dims(x[-1,:],0),[N_repeat],axis=0)
        x = np.vstack((x,repeat_values))
        x = torch.FloatTensor(x)
        y = torch.LongTensor([labels.loc[i].item()])
        if size_dictionary.get(MAX) is None:
            size_dictionary[MAX] = [(x,y)]
      
        else:
            size_dictionary[MAX].append((x,y))

    for key in size_dictionary.keys():
        Xs, ys = list(zip(*size_dictionary[key]))
        size_dictionary[key] = (torch.stack(Xs,0),torch.stack(ys,0))
    
    keys = np.array(list(size_dictionary.keys()))[np.argsort([size_dictionary[key][0].shape[0] for key in size_dictionary.keys()])[::-1]]
    return size_dictionary,keys

In [None]:
def train_loop(model,optimizer,criterion,size_dictionary,keys,lam1=1,lam2=1,lam3=1):
    loss_value = []
    loss1_value = []
    loss2_value = []
    norms_value = []
    scheduler.step()
    #for i_batch, sample in enumerate(dataloader):
    predictions = []
    labels = []
    model.train()
    for key in keys:
        Xs,ys = size_dictionary[key]
        train_dl = DataLoader(TensorDataset(Xs,ys),batch_size=BATCH_SIZE,shuffle=True)
        for X,y in train_dl:
            T = X.shape[1]
            X = X.to(device) #cuda()
            y = y.to(device) #cuda()
            data = X

            inputData = Variable(data)
            optimizer.zero_grad()
            x_pred,latent,y_pred = model.forward(inputData,T)
            

            norms,MSE_LOSS1,MSE_LOSS2,loss = criterion(inputData,x_pred,latent,y_pred,lam1=lam1,lam2=lam2,lam3=lam3)

            loss.backward()
                        
            optimizer.step()
            loss_value.append(loss.data.item())
            loss1_value.append(MSE_LOSS1.data.item())
            loss2_value.append(MSE_LOSS2.data.item())
            norms_value.append(norms.data.item())

#             with torch.no_grad():
#                 predictions.extend(y_pred.argmax(1).cpu().numpy().tolist())
#                 labels.extend(y.ravel().cpu().numpy().tolist())
    
    loss_val = np.mean(np.array(loss_value))
    loss1_val = np.mean(np.array(loss1_value))
    loss2_val = np.mean(np.array(loss2_value))
    norms_val = np.mean(np.array(norms_value))
    return (norms_val,loss1_val,loss2_val,loss_val),predictions,labels 

In [None]:
def evaluation_loop(model,criterion,size_dictionary,keys,lam1=1,lam2=1,lam3=1):
    with torch.no_grad():
        loss_value = []
        loss1_value = []
        loss2_value = []
        norms_value = []
        #for i_batch, sample in enumerate(dataloader):
        predictions = []
        labels = []
        model.eval()
        for key in keys:
            Xs,ys = size_dictionary[key]
            train_dl = DataLoader(TensorDataset(Xs,ys),batch_size=BATCH_SIZE,shuffle=True)
            for X,y in train_dl:
                T = X.shape[1]
                X = X.to(device) #cuda()
                y = y.to(device) #cuda()
                data = X

                inputData = Variable(data)

                x_pred,latent,y_pred = model.forward(inputData,T)


                norms,MSE_LOSS1,MSE_LOSS2,loss = criterion(inputData,x_pred,latent,y_pred,lam1=lam1,lam2=lam2,lam3=lam3)

                loss_value.append(loss.data.item())
                loss1_value.append(MSE_LOSS1.data.item())
                loss2_value.append(MSE_LOSS2.data.item())
                norms_value.append(norms.data.item())

        loss_val = np.mean(np.array(loss_value))
        loss1_val = np.mean(np.array(loss1_value))
        loss2_val = np.mean(np.array(loss2_value))
        norms_val = np.mean(np.array(norms_value))
        return (norms_val,loss1_val,loss2_val,loss_val),predictions,labels 

In [None]:
def criterion(x,x_pred,y,y_pred,lam1=1,lam2=1,lam3=1):
    """
    lam1 is reconstruction loss
    lam2 is latent reconstruction loss
    lam3 is norm loss
    
    """
    MSE_LOSS_actual = torch.nn.functional.mse_loss(x_pred,x)
    MSE_LOSS_latent = torch.nn.functional.mse_loss(y_pred,y)#
    NORM_LOSS = torch.norm(y,p='fro',dim=1).mean()

    loss = lam1*MSE_LOSS_actual + lam2*MSE_LOSS_latent - lam3*torch.clamp(NORM_LOSS,-1,1)  # if Kitti: loss = loss_mse(output, expectedOut)
  
#     with torch.no_grad():
#         NORM_LOSS = torch.norm(y_pred,p='fro',dim=1).mean()

    return NORM_LOSS,MSE_LOSS_latent,MSE_LOSS_actual,loss

In [None]:
## HyperParameters for the Network
NumOfPoles = 80

N = NumOfPoles*4

dataset_name = "heartbeat"

In [None]:
with open(f"data/{dataset_name}/{dataset_name}_train_inputs.pickle", "rb") as handle:
    train_df = pickle.load(handle)
    
with open(f"data/{dataset_name}/{dataset_name}_train_labels.pickle", "rb") as handle:
    train_labels = pickle.load(handle) 

with open(f"data/{dataset_name}/{dataset_name}_test_inputs.pickle", "rb") as handle:
    test_df = pickle.load(handle)
    
with open(f"data/{dataset_name}/{dataset_name}_test_labels.pickle", "rb") as handle:
    test_labels = pickle.load(handle)    
    # train_dictionary,train_keys = create_data_dictionary(train_df,train_labels)


In [None]:
# train_df = pd.DataFrame(np.concatenate([np.expand_dims(train_df.loc[i],0) for i in np.unique(train_df.index)],axis=0))
# test_df = pd.DataFrame(np.concatenate([np.expand_dims(test_df.loc[i],0) for i in np.unique(test_df.index)],axis=0))

In [None]:
test_dictionary,test_keys = create_data_dictionary(test_df,test_labels,chunk_size=1,task="classification")
train_dictionary,train_keys = create_data_dictionary(train_df,train_labels,chunk_size=1,task="classification")

In [None]:
print(train_keys)

In [None]:
D = train_df.shape[1]
N_class = len(np.unique(train_labels.values))

In [None]:
print("Training Data Shape",train_df.index.unique().shape)
print("Testing Data Shape",test_df.index.unique().shape)
print("Training Data Classes",N_class)
print("Training Data Dimension: ",D)

In [None]:
## Load saved model 
load_ckpt = False
ckpt_file = 'preTrainedModel/UCFModel.pth' # for Kitti Dataset: 'KittiModel.pth'
checkptname = dataset_name

In [None]:
## Initializing r, theta
P,Pall = gridRing(N)
Drr = abs(P)
Drr = torch.from_numpy(Drr).float() #.to(device)
Dtheta = np.angle(P)
Dtheta = torch.from_numpy(Dtheta).float() #.to(device)

In [None]:
import time

In [None]:
embed_dim = 128
latent_dim= 256
nhead=16
BATCH_SIZE = 8
LR = 0.0005
EPOCH = 100
print_every = 5
saveEvery = 10
clamp = 0
encoder_layers=1
decoder_layers=1
device = torch.device("cuda:0")

In [None]:
## Create the model
model = TDYANT(Drr,
                Dtheta,
                N ,
                D, 
                embed_dim,
                latent_dim,
               nhead,
                N_class,
                encoder_layers,
                decoder_layers,
                device,
                clamp).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=LR,weight_decay=1e-8)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[150,200], gamma=0.1) # if Kitti: milestones=[100,150]

In [None]:
model.device = "cuda:0"
model = model.cuda()
device = "cuda:0"

In [None]:
start_epoch = 1

## If want to continue training from a checkpoint
if(load_ckpt):
    loadedcheckpoint = torch.load(ckpt_file)
    start_epoch = loadedcheckpoint['epoch']
    model.load_state_dict(loadedcheckpoint['state_dict'])
    optimizer.load_state_dict(loadedcheckpoint['optimizer'])

print("Training from epoch: ", start_epoch)
print('-' * 25)

## Start the Training
for epoch in range(start_epoch, EPOCH+1):
    random.shuffle(train_keys)
    start = time.time()
    (norms,loss1,loss2,loss_val),predictions,labels = train_loop(model,optimizer,criterion,train_dictionary,train_keys,lam1=1,lam2=0.0,lam3=0)
    end = time.time()
    if (epoch)%print_every == 0:
        (norms_v,loss1_v,loss2_v,loss_val_v),predictions,labels = evaluation_loop(model,criterion,test_dictionary,test_keys,lam1=1,lam2=0,lam3=0)

    print('Epoch: ', epoch)
    print("| train time: %.6f" % (end-start))
    print('| train loss: %.6f' % loss_val)
    print('| train loss LATENT: %.6f' % loss1)
    print('| train loss ACTUAL: %.6f' % loss2)
    print('| LATENT NORM: %.6f' % norms)
    if (epoch)%print_every == 0:
        print('| val loss: %.6f' % loss_val_v)
        print('| val loss LATENT: %.6f' % loss1_v)
        print('| val loss ACTUAL: %.6f' % loss2_v)
        print('| val LATENT NORM: %.6f' % norms_v)
    print("\n")

#     print("Classification Report:")
#     print(classification_report(labels,predictions,zero_division=1))
    
    if epoch % saveEvery ==0 :
        print("Saving Checkpoint")
        save_checkpoint({'epoch': epoch + 1,
                        'state_dict': model.state_dict(),
                        'optimizer' : optimizer.state_dict(),
                        },f"data/{checkptname}/"+checkptname+str(epoch)+'.pth')

In [None]:
if(False):
    loadedcheckpoint = torch.load(r"C:\Users\lpott\Desktop\DYAN\Code\data\lorenz\lorenz20.pth")
    start_epoch = loadedcheckpoint['epoch']
    model.load_state_dict(loadedcheckpoint['state_dict'])
    optimizer.load_state_dict(loadedcheckpoint['optimizer'])

In [None]:
model = model.to("cuda:0")
model.device = "cuda:0"

In [None]:
with torch.no_grad():
    i=3
    x_pred = model.forward(test_dictionary[T][0][n].unsqueeze(0).cuda(),T)[0].cpu().detach().numpy()
    plt.figure(figsize=(10,10))
#     for i in range(3):
    plt.plot(np.arange(len(x_pred[0,:,i]))*0.01,test_dictionary[T][0][n,:,i])#,'b-')
    plt.plot(np.arange(len(x_pred[0,:,i]))*0.01,x_pred[0,:,i])#,'r--')

In [None]:
n = 2
I = 3
T = 405
with torch.no_grad():
    # latent dim
#     plt.figure(figsize=(10,10))
#     embedding = torch.clamp(model.encoder_.forward(test_dictionary[T][0][n].unsqueeze(0).cuda()),-2,2)
    if clamp == 0:
        embedding = model.encoder_.forward(test_dictionary[T][0][n].unsqueeze(0).cuda())
    else:
        embedding = torch.tanh(model.encoder_.forward(test_dictionary[T][0][n].unsqueeze(0).cuda()))*clamp

#     plt.plot(embedding[0].cpu().detach().numpy(),'b--')
            
    dic = creatRealDictionary(T,model.rr,model.theta,model.device)
    sparsecode = fista(dic,embedding,0.1,100,model.device)
    DYAN_embedding = torch.matmul(dic,sparsecode)[0].cpu().detach().numpy()
#     plt.plot(DYAN_embedding,'r-')
    
    x_pred = model.forward(test_dictionary[T][0][n].unsqueeze(0).cuda(),T)[0].cpu().detach().numpy()
    plt.figure(figsize=(10,10))
#     for i in range(3):
    plt.plot(np.arange(len(x_pred[0,:,:]))*0.01,test_dictionary[T][0][n,:,:])#,'b-')
    plt.plot(np.arange(len(x_pred[0,:,:]))*0.01,x_pred[0,:,:])#,'r--')
    plt.xlabel("Time (s)",fontsize=20)
    plt.ylabel("State",fontsize=20)
    plt.legend(["x","$x_{reconstructed}$","y","$y_{reconstructed}$","z","$z_{reconstructed}$"],fontsize=20)

In [None]:
with torch.no_grad():
    nrow = 4; ncol = 8
    fig, axs = plt.subplots(nrow, ncol)
    if nrow == 1:
        axs = np.expand_dims(axs,0)
    for i in range(nrow):
        for j in range(ncol):
            axs[i,j].plot(embedding[0,:,i*ncol + j].cpu().detach().numpy())
            axs[i,j].plot(DYAN_embedding[:,i*ncol + j])
            axs[i,j].title.set_text(f"Feature {i*ncol+j+1}")

In [None]:
emb

In [None]:
if dataset_name =="nonlinear" or "UWaveGestureLibrary":
    plt.figure()
    %matplotlib qt5
    ax = plt.axes(projection='3d')
    ax.scatter3D(embedding[0,:,0].cpu().detach().numpy(),
                 embedding[0,:,1].cpu().detach().numpy(),
                 embedding[0,:,2].cpu().detach().numpy(),
                 c=np.linspace(0,1,len(test_dictionary[T][0][0,:,0])))

In [None]:
ax = plt.axes(projection='3d')

ax.scatter3D(np.arange(0,10),np.arange(0,10),np.arange(0,10),c=np.arange(0,10))
plt.show()

In [None]:
if dataset_name == 'lorenz' or "UWaveGestureLibrary":
    plt.figure()
    %matplotlib qt5
    ax = plt.axes(projection='3d')
    ax.scatter3D(test_dictionary[T][0][n,:,0],test_dictionary[T][0][n,:,1],test_dictionary[T][0][n,:,2],c=np.linspace(0,1,len(test_dictionary[T][0][0,:,0])))
    ax.plot3D(x_pred[0,:,0],x_pred[0,:,1],x_pred[0,:,2])
    ax.set_xlabel('$X$', fontsize=20)
    ax.set_ylabel('$Y$',fontsize=20)
    ax.set_zlabel(r'$Z$', fontsize=20)
    plt.show()
    
#     plt.figure()
#     ax = plt.axes(projection='3d')
#     ax.plot3D(DYAN_embedding[:,0],DYAN_embedding[:,1],DYAN_embedding[:,2])
#     plt.show()

In [None]:
with torch.no_grad():
    n_ahead = 2
    embedding = torch.tanh(model.encoder_.forward(test_dictionary[T][0][n].unsqueeze(0).cuda()))*clamp
    
    dic = creatRealDictionary(T,model.rr,model.theta,model.device)
    sparsecode = fista(dic,embedding,0.1,100,model.device)
    dic_pred = creatRealDictionary(T+n_ahead,model.rr,model.theta,model.device)
    DYAN_embedding = torch.matmul(dic_pred,sparsecode)#[0]
    prediction = model.decoder_.forward(DYAN_embedding).cpu().detach().numpy()[0]
    
    plt.figure(figsize=(10,10))
    plt.plot(np.arange(len(x_pred[0,:,:]))*0.01,test_dictionary[T][0][n,:,:])#,'b-')
    plt.plot(0.01*T + np.arange(n_ahead)*0.01,test_dictionary[T][0][n+1,:n_ahead,:],'m*')
    plt.plot(np.arange(len(prediction))*0.01,prediction)

In [None]:
X_train,y_train = create_ilkay_dataset(model,train_dictionary,train_keys,device)
X_test,y_test = create_ilkay_dataset(model,test_dictionary,test_keys,device)

In [None]:
with open(f"data/{dataset_name}/{dataset_name}_train_inputs_ilkay.pickle", "wb") as handle:
    pickle.dump(X_train,handle)
    
with open(f"data/{dataset_name}/{dataset_name}_train_labels_ilkay.pickle", "wb") as handle:
    pickle.dump(y_train,handle)

with open(f"data/{dataset_name}/{dataset_name}_test_inputs_ilkay.pickle", "wb") as handle:
    pickle.dump(X_test,handle)
    
with open(f"data/{dataset_name}/{dataset_name}_test_labels_ilkay.pickle", "wb") as handle:
    pickle.dump(y_test,handle)
    # train_dictionary,train_keys = create_data_dictionary(train_df,train_labels)


In [None]:
device="cpu"

In [None]:
model.device = "cpu"
model = model.cpu()

In [None]:
model = model.cpu()

In [None]:
torch.cuda.empty_cache()

In [None]:
uneven = True if len(train_keys) > 1 else False 
if uneven:
    X_train,y_train = create_numpy_dataset_uneven(model.to("cpu"),train_dictionary,train_keys,device)
    X_test,y_test = create_numpy_dataset_uneven(model.to("cpu"),test_dictionary,test_keys,device)
else:
    X_train,y_train = create_numpy_dataset(model.to("cpu"),train_dictionary,train_keys,device)
    X_test,y_test = create_numpy_dataset(model.to("cpu"),test_dictionary,test_keys,device)

In [None]:
with open(f"data/{dataset_name}/{dataset_name}_train_inputs_concat.pickle", "wb") as handle:
    pickle.dump(X_train,handle)
    
with open(f"data/{dataset_name}/{dataset_name}_train_labels_concat.pickle", "wb") as handle:
    pickle.dump(y_train,handle)

with open(f"data/{dataset_name}/{dataset_name}_test_inputs_concat.pickle", "wb") as handle:
    pickle.dump(X_test,handle)
    
with open(f"data/{dataset_name}/{dataset_name}_test_labels_concat.pickle", "wb") as handle:
    pickle.dump(y_test,handle)
    # train_dictionary,train_keys = create_data_dictionary(train_df,train_labels)


## PYTORCH CLASSIFIER

In [None]:
class classifier(nn.Module):
    def __init__(self,D,N_class,embed_dim,latent_dim,num_heads=8,dim_feedforward=64):
        super(classifier,self).__init__()
    
        
        self.D = D
        self.N_class = N_class
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.dim_feedforward = dim_feedforward

        
#         self.pos_encoder = PositionalEncoding(latent_dim)
        
        self.mha1 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=num_heads,batch_first=True,dim_feedforward=dim_feedforward,dropout=.1,norm_first=False,activation='relu')
        self.mha2 = nn.TransformerEncoderLayer(d_model=embed_dim,nhead=num_heads,batch_first=True,dim_feedforward=dim_feedforward,dropout=.8)

        self.project = nn.Linear(latent_dim,embed_dim)
        self.l1 = nn.Linear(embed_dim*365,64)
        self.decision = nn.Linear(64,1)
        
    def forward(self,x):
#         dic = creatRealDictionary(T,self.rr,self.theta,self.gid)
#         ## for UCF Dataset:
#         sparsecode = fista(dic,x,0.1,100,self.gid)
        
        x = torch.tanh(x)
#         print(self.pos_encoder(torch.arange(x.size(1)).cuda()).shape)
#         x = x + self.pos_encoder(torch.arange(x.size(1)).cuda()).unsqueeze(0)
#         x = torch.tanh(self.pos_encoder(x))
        x = torch.tanh(self.project(x))
        x = torch.tanh(self.mha1(x))
        x = torch.tanh(self.mha2(x))

        x = x.reshape(-1,365*self.embed_dim)
#         print(x.shape)
        x= torch.tanh(self.l1(x))
        x = torch.softmax(self.decision(x),-1)
#         x = self.decision(x)
        return x
#mha_o = mha_o + c.permute(0,2,1)

In [None]:
import math

In [None]:
dataset_name = "UWaveGestureLibrary"

In [None]:
with open(f"data/{dataset_name}/{dataset_name}_train_inputs_concat.pickle", "rb") as handle:
    X_train = pickle.load(handle)
    
with open(f"data/{dataset_name}/{dataset_name}_train_labels_concat.pickle", "rb") as handle:
    y_train = pickle.load(handle)

with open(f"data/{dataset_name}/{dataset_name}_test_inputs_concat.pickle", "rb") as handle:
    X_test = pickle.load(handle)
    
with open(f"data/{dataset_name}/{dataset_name}_test_labels_concat.pickle", "rb") as handle:
    y_test = pickle.load(handle)
    # train_dictionary,train_keys = create_data_dictionary(train_df,train_labels)


In [None]:
if type(X_train) is dict:
    X_train = np.concatenate(list(X_train.values()))
    X_test = np.concatenate(list(X_test.values()))
    y_train = np.concatenate(list(y_train.values()))
    y_test = np.concatenate(list(y_test.values()))
else:
    X_train = torch.FloatTensor(X_train)#.permute(0,2,1)
    X_test = torch.FloatTensor(X_test)#.permute(0,2,1)
    y_train = torch.FloatTensor(y_train)
    y_test = torch.FloatTensor(y_test)

In [None]:
print("X Train Shape",X_train.shape)

In [None]:
D = X_train.shape[-1]
N_class = len(np.unique(y_train))

In [None]:
device="cuda:0"
model_c = classifier(D, N_class,32,3,8,64).cuda() #cuda()

In [None]:
# X_train = (X_train-X_train.mean(dim=1,keepdim=True))/(X_train.std(dim=1,keepdim=True) + 1e-10)
# X_test = (X_test-X_test.mean(dim=1,keepdim=True))/(X_test.std(dim=1,keepdim=True) + 1e-10)

In [None]:
unique, counts = np.unique(y_train.tolist(), return_counts=True)
C = 1-torch.FloatTensor(counts/np.sum(counts)).to(device)
# C = torch.FloatTensor([1,10]).to(device)

In [None]:
# X_train = (X_train - X_train.mean(1,keepdim=True))/(X_train.std(1,keepdim=True)+1e-20)
# X_test = (X_test - X_test.mean(1,keepdim=True))/(X_test.std(1,keepdim=True)+1e-20)

In [None]:
train_dl = DataLoader(TensorDataset(X_train,y_train),batch_size=64,shuffle=True)
test_dl = DataLoader(TensorDataset(X_test,y_test),batch_size=256,shuffle=False)

In [None]:
criterion = nn.MSELoss()#nn.CrossEntropyLoss(C)

In [None]:
optimizer = torch.optim.Adam(model_c.parameters(),0.001,weight_decay=0.000)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
for epoch in range(400):
    losses = []
    model_c.train()
    train_predictions = []
    train_label_list = []
    for x,y in train_dl:
        x = x.to(device)
        y = y.to(device)
        T = x.shape[1]
        optimizer.zero_grad()
        
        y_pred = model_c(x)
        
        loss = criterion(y_pred.squeeze()*10,y)
        loss.backward()
        
        losses.append(loss.item()*x.shape[0])
        
        with torch.no_grad():
            train_predictions.extend(y_pred.ravel().tolist())#argmax(1).tolist())
            train_label_list.extend(y.tolist())
        
        optimizer.step()
    
    with torch.no_grad():
        model_c.eval()
        test_predictions = []
        for x,y in test_dl:
            x = x.to(device)*10
            y = y.to(device)
            T = x.shape[1]
            y_pred = model_c(x)
            test_predictions.extend(y_pred.ravel().tolist())#.argmax(1).tolist())
        print("="*10+f"{epoch}"+"="*10)
#         print("TRAIN")
        print("Train error: ",mean_squared_error(train_predictions,y_train))
        print("Test error: ", mean_squared_error(test_predictions,y_test))
    
        print(classification_report(train_label_list,train_predictions,digits=4,zero_division=1))
#         print("TEST")
        print(classification_report(y_test.tolist(),test_predictions,digits=4,zero_division=1))
        accuracy = np.mean(np.array(y_test.tolist()) == np.array(test_predictions))
        if accuracy > best_accuracy:
            best_accuracy=accuracy
    print(np.mean(losses))

## SKLEARN

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC,SVR
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.decomposition import PCA,IncrementalPCA

In [None]:
X_train,y_train = train_dictionary[25] 
X_test,y_test = test_dictionary[25] 

In [None]:
X_train = X_train.numpy()
y_train = y_train.numpy().ravel()
X_test = X_test.numpy()
y_test = y_test.numpy().ravel()

In [None]:
X_train.shape

In [None]:
if type(X_train) is np.ndarray:
    X_tr = X_train
    X_te = X_test
    y_te = y_test
    y_tr= y_train
else:
    X_tr = X_train.cpu().numpy()
    X_te = X_test.cpu().numpy()
    y_te = y_test.cpu().numpy()
    y_tr= y_train.cpu().numpy()

In [None]:
X_tr = X_tr.reshape(X_train.shape[0],-1)
X_te = X_te.reshape(X_test.shape[0],-1)

In [None]:
y_tr.shape

In [None]:
y_te.shape

In [None]:
y_te.ravel()

In [None]:
plt.hist(y_te.ravel())

In [None]:
pca = PCA(n_components=.995,random_state=0)
svm = SVC(random_state=0,class_weight='balanced',kernel='linear',C=1,decision_function_shape='ovr')
log = LogisticRegression(class_weight='balanced')
# gpc = GaussianProcessClassifier(1.0 * RBF(1.0))
# ada = AdaBoostClassifier(n_estimators=100, random_state=0)
# knn = KNeighborsClassifier(n_neighbors=30)

In [None]:
from sklearn.linear_model import Lasso

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
std = StandardScaler()

In [None]:
clf = make_pipeline(pca,log)

In [None]:
clf.fit(X_tr,y_tr)

In [None]:
pred_tr = clf.predict(X_tr)
pred_te = clf.predict(X_te)

In [None]:
np.sqrt(mean_squared_error(pred_tr,y_tr))

In [None]:
np.sqrt(mean_squared_error(pred_te,y_te))

In [None]:
plt.plot(y_te,pred_te,'*')

In [None]:
plt.plot(y_tr,pred_tr,'*')

In [None]:
print(classification_report(y_train,pred_tr,digits=3))

In [None]:
print(classification_report(y_test,pred_te,digits=3))