In [84]:
import numpy as np
import torch
from torch import nn, optim

import time
import os
import random

from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [85]:
# Block 1: Encoder
class Encoder(nn.Module):
    def __init__(self, 
               input_size = 1,
               embedding_size = 1,
               hidden_size = 16,
               n_layers = 2,
               dropout = 0.5):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.linear = nn.Linear(input_size, embedding_size, n_layers)
        self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, 
                        dropout = dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        """
        x: input batch data, 
        size of x: [sequence len, batch size, feature size]
        """

        # size of embedded : [sequence len, batch size, embedding size]
        embedded = self.dropout(F.relu(self.linear(x)))

        output, (hidden, cell) = self.rnn(embedded)
        # hidden: the last step hidden of each layer of rnn
        # size of hidden : [num of layers * num directions, batch size, hidden size]
        # num of directions is 1, since we are useing signle directional rnn
        # cell: the last step cell of each layer of rnn
        # size of cell: [num of layers * num of directions, batch size, hidden size]
        
        return hidden, cell

In [86]:
# Block 2: Decoder
class Decoder(nn.Module):
    def __init__(self, 
                output_size = 1,
                embedding_size = 1,
                hidden_size = 16,
                n_layers = 2,
                dropout = 0.5):
        super().__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Linear(output_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
        self.linear = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, hidden, cell):
        """
        x: input batch data, 
        size of x: [batch size, feature size]
        x is only 2-dimensional, since the input is batches of last coordinate of the sequence,
        so the sequence length has been removed
        """

        # add a sequence dimension to the front of x, to allow for use of nn.LSTM method
        x = x.unsqueeze(0)
        # size(x) now becomes [1, batch size, feature size]
        embedded = self.dropout(F.relu(self.embedding(x)))

        # size of output : [seq len, batch size, hidden dimension * num of directions]
        # size of hidden : [num of layers * num of directions, batch size, hidden dim]
        # size of cell : [num of layers * num of directions, batch size, hidden dim]

        # notice that sequence len and num of directions will always be 1 in the Decoder, therfore:
        # size of output : [1, batch size, hidden dimension]
        # size of hidden : [num of layers, batch size, hidden dim]
        # size of cell : [num of directions, batch size, hidden dim]

        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))

        # prediction = [batch size, output size]
        prediction = self.linear(output.squeeze(0))

        return prediction, hidden, cell


In [87]:
# Block 3: Seq2seq
class Seq2seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        #self.device = device

        assert encoder.hidden_size == decoder.hidden_size, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"

    def forward(self, x, y, teacher_forcing_ratio = 0.5):
        """
        size of x : [observed sequence len, batch size, feature size]
        size of y : [target sequence len, batch size, feature size]
        """
        batch_size = x.shape[1]
        target_len = y.shape[0]
        
        # tensor to store decoder outputs of each time step
        outputs = torch.zeros(y.shape)
        
        # last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(x)

        # first input to decoder is last coordinates of x
        decoder_input = x[-1, :, :]
        
        for i in range(target_len):
            # run decode for one time step
            output, hidden, cell = self.decoder(decoder_input, hidden, cell)
            
            # place predictions in a tensor holding predictions for each time step
            outputs[i] = output

            # decide if we are going to use teacher forcing or not
            teacher_forcing = random.random() < teacher_forcing_ratio

            # output is the same shape as input, [batch_size, feature size]
            # use output directly as input or use true lable depending on
            # teacher_forcing is true or not
            decoder_input = y[i] if teacher_forcing else output

        return outputs


In [88]:
# Block 4: RBF
class RBF(nn.Module):
    def __init__(self):
        super(RBF, self).__init__()
        self.sigma = nn.Parameter(torch.Tensor(1))
        self.reset_parameters()
        
    def reset_parameters(self):
        nn.init.constant_(self.sigma, 1)
    
    def forward(self, x1, x2):
        '''
        size of x1/x2 : [input sequence len, batch size, feature size],
        for our task, the last two sizes are both 1.
        '''
        
        value = (x1 - x2).pow(2).sum(0).pow(0.5) / self.sigma
        
        return torch.exp(-value)

In [89]:
# Block 5: Kernel
# class Kernel refers to the structure combining Seq2seq module and RBF module.
class Kernel(nn.Module):
    def __init__(self, seq2seq, rbf, target_length, output_dim):
        super().__init__()
        # target_length: seq2seq2 output sequence length
        self.target_length = target_length
        # output_dim: seq2seq2 output embedding size; in our case being 1
        self.output_dim = output_dim
        
        self.seq2seq = seq2seq
        self.rbf = rbf
    
    def forward(self, x1, x2):
        """
        size of x1/x2 : [observed sequence len, batch size, feature size]
        """
        # size of output_size corresponds to the size of seq2seq output
        output_size = torch.randn(self.target_length, 1, self.output_dim)
        
        outputs1 = self.seq2seq(x1, output_size)
        outputs2 = self.seq2seq(x2, output_size)
        
        # size of value : [batch size, feature size], both being 1 in our case
        value = self.rbf(outputs1, outputs2)
        
        return value
        

In [99]:
# Block 6: Model
class Model(nn.Module):
    def __init__(self, kernel, xs, ys):
        super().__init__()
        # xs is a list of input data x,
        # where the size of x is: [sequence len, batch size, feature size]
        self.xs = xs
        # ys is a list of label y.
        # size of y: [batch size]
        self.ys = ys
        # data_length: num of items
        self.data_length = len(ys)
        # size of alphas: [num of items, batch size]
        self.alphas = torch.randn(self.data_length, 1)
        self.kernel = kernel
        #self.kernel_np = lambda x1, x2: kernel(x1, x2).detach().numpy()
        
    def forward(self):
        
        value = torch.zeros(1,1)
        
        for i in range(self.data_length):
            for j in range(self.data_length):
                # the i-j term of dual kernal-svm objective
                term = self.alphas[i]*self.alphas[j]*self.ys[i]*self.ys[j]*self.kernel(self.xs[i],self.xs[j])  
                value = torch.add(value, term)
        
        value = -0.5*value
        print(f"objective value now is {value}\n")
        
        return value
    
    def update_alpha(self, alphas):
        self.alphas = alphas
        
    def kernel_np(self, x1, x2):
        '''
        kernel_np takes the argument of 2-dimensional list,
        tranforming them into tensors demanded by kernel.forward(),
        returning a 1-d numpy array containing one single number
        
        x1 is a list of lists: [ [2, 5, 18, 0, 2, ...(sequence of data item 1)], [(sequence of data item 2)], .... ]
        '''
        lst1 = []
        lst2 = []
        
        for item in x1:
            lst1.append([[item]])
        x1_alter = torch.Tensor(lst1)
        
        lst2 = []
        for item in x2:
            lst2.append([[item]])
        x1_alter = torch.Tensor(lst1)
        x2_alter = torch.Tensor(lst2)
        
        #print(x1_alter)
        #print(x2_alter)
        return kernel(x1_alter, x2_alter).detach().numpy()[0]


In [104]:
# block 7, initialize the model

INPUT_DIM = 1
OUTPUT_DIM = 1
ENC_EMB_DIM = 1
DEC_EMB_DIM = 1
HID_DIM = 16   # this can be adjusted
N_LAYERS = 2   # this can be adjusted
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

TARG_LENGTH = 16 # this can be adjusted

dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("devivce=", dev)

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
seq = Seq2seq(enc, dec)
rbf = RBF()
kernel = Kernel(seq, rbf, TARG_LENGTH, OUTPUT_DIM)


devivce= cpu


In [105]:
# block 8, generating random test sample

datanum = 10
X_trainSVM = []
Y_trainSVM = []
for i in range(0,datanum):
    lst = []
    length = random.randint(20,50)
    for j in range(0,length):
        lst.append(random.randint(0,26))
    X_trainSVM.append(lst)

for i in range(0,datanum):
    Y_trainSVM.append(random.randint(0,1))

X_train = []
Y_train = []

for item in X_trainSVM:
    lst = []
    for num in item:
        lst.append([[num]])
    X_train.append(torch.Tensor(lst))

for num in Y_trainSVM:
    Y_train.append(torch.Tensor([num]))

    
print(X_trainSVM)
print(Y_trainSVM)
print(X_train)
print(Y_train)
    
model = Model(kernel, X_train, Y_train).to(dev)
print(model)

kernel(X_train[0],X_train[1])
model.kernel_np(X_trainSVM[0], X_trainSVM[1])

[[3, 11, 26, 11, 25, 8, 16, 12, 19, 12, 5, 7, 21, 13, 10, 13, 24, 22, 19, 22, 0, 8, 20, 21, 12, 15, 17], [21, 20, 15, 21, 21, 0, 4, 21, 22, 21, 19, 7, 9, 1, 25, 10, 22, 7, 19, 15, 23, 15, 3, 16], [25, 24, 4, 12, 2, 1, 0, 5, 7, 20, 23, 20, 5, 16, 6, 13, 6, 4, 20, 7, 16, 11, 10, 24, 2, 24, 6, 26, 0, 11, 2, 10, 15, 22, 19, 10], [19, 17, 17, 20, 1, 17, 21, 18, 19, 7, 17, 5, 17, 3, 24, 16, 13, 11, 11, 26, 22, 25, 22, 9, 14, 3, 18, 10, 19, 22, 0, 1, 4, 14, 21, 15], [0, 7, 5, 2, 11, 10, 14, 18, 4, 20, 26, 7, 26, 11, 7, 22, 13, 0, 8, 10, 7, 9, 3, 20, 25, 7, 21, 20, 25, 5, 16, 26, 6, 20, 11, 14, 24, 6, 3, 20, 8, 10, 25], [14, 22, 11, 0, 12, 6, 20, 8, 16, 13, 26, 11, 20, 25, 7, 18, 10, 20, 25, 3, 1, 17, 18, 25, 23, 23, 21, 21, 24, 1, 21, 2, 21, 20, 20, 6, 2, 6, 17, 12, 21, 16, 9, 2], [7, 0, 4, 5, 4, 3, 21, 9, 20, 15, 21, 18, 13, 15, 25, 12, 6, 12, 11, 3, 3, 1], [23, 25, 14, 10, 4, 0, 3, 6, 15, 5, 7, 0, 20, 7, 3, 9, 5, 23, 1, 12, 16, 18, 8, 24, 2, 3, 9, 4, 6, 2, 13, 25, 20], [18, 10, 0, 20, 1, 20

array([0.96276796], dtype=float32)

In [108]:
# block 9
# this is the interface to incorporate traing datasets into the model




# X_train, Y_train stores the data demanded by the -------NETWORK---------
# formats:
# X_train should be a list of input data x,
# where x should be a torch.tensor, size: [sequence len, batch size, feature size]
# Y_train should be a list of label y.
# where y should be a torch.tensor, size: [batch size]
# in our case: batch size and feature size are both set to 1
# refer to block 6 to view the details
X_train = []
Y_train = []

# X_trainSVM, Y_trainSVM stores the data demanded by the -------SVM OPTIMIZER---------
# formats:
# X_trainSVM should be a list of input data x,
# where x is a list of number
# Y_trainSVM should be a list of label (number)
X_trainSVM = []
Y_trainSVM = []

import json
# replace the file to be loaded here.
with open('shuffle.json', 'r') as jsonfile:
    data = json.load(jsonfile)

# load X_train, Y_train
for key in data:
    lst = []
    for item in data[key]['x']:
        lst.append([[item]])
    X_train.append(torch.Tensor(lst))
    Y_train.append(torch.Tensor([data[key]['y']]))
print(X_train[0])
print(Y_train[0])

# load X_trainSVM, Y_trainSVM
for key in data:
    X_trainSVM.append(data[key]['x'])
    Y_trainSVM.append(data[key]['y']) 
print(X_trainSVM[0])
print(Y_trainSVM[0])

model = Model(kernel, X_train, Y_train).to(dev)
print(model)
print(model.alphas)



tensor([[[ 4.]],

        [[25.]],

        [[11.]],

        [[ 4.]],

        [[ 4.]],

        [[ 4.]],

        [[ 4.]],

        [[11.]],

        [[12.]],

        [[11.]],

        [[16.]],

        [[ 4.]],

        [[ 6.]],

        [[ 3.]],

        [[ 6.]],

        [[12.]],

        [[ 5.]],

        [[ 5.]],

        [[ 4.]],

        [[16.]],

        [[ 7.]],

        [[ 9.]],

        [[ 3.]],

        [[18.]],

        [[ 7.]],

        [[25.]],

        [[ 9.]],

        [[20.]],

        [[18.]],

        [[25.]],

        [[ 6.]],

        [[25.]],

        [[14.]],

        [[14.]],

        [[17.]],

        [[20.]],

        [[11.]],

        [[17.]],

        [[ 3.]],

        [[ 5.]],

        [[18.]],

        [[ 6.]],

        [[11.]],

        [[25.]],

        [[ 7.]],

        [[ 7.]],

        [[ 3.]],

        [[12.]],

        [[ 7.]],

        [[14.]],

        [[13.]],

        [[14.]],

        [[14.]],

        [[ 6.]],

        [[ 5.]],

        [[

In [102]:
from svm.svm import SVM
svm = SVM()

In [107]:
# block 10, training.

class myCustom(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, output):
        return output
    
criterion = myCustom()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

print("training begin")
rounds = 0
for rounds in range(1000):
    for epoch in range(5):
        # zero the parameter gradients
        rounds += 1
        print(f"rounds = {rounds} \n")
        optimizer.zero_grad()
        
        # forward + backward + optimize kernel-svm objective wrt \theta (kernel parameters)
        output = model()
        loss = criterion(output)
        loss.backward()
        optimizer.step()
        
        print(f"loss = {loss}\n")
        
        
        # call kernel-svm solver to optimize objective wrt alpha (at the same time, passing the current kernel)
        # returns the updated new_alphas
        new_alphas = svm.fit(X_trainSVM, 
                             Y_trainSVM,
                             kernel=model.kernel_np)
        print(f"svm score is: {svm.score(X_trainSVM, Y_trainSVM)}")
        # -------------------------------------------------------------
        alpha_list = []
        for item in new_alphas:
            alpha_list.append([item])
        print(f"new alphas are: {torch.tensor(alpha_list)}")
        model.update_alpha(torch.tensor(alpha_list))
        
    

training begin
rounds = 1 

objective value now is tensor([[-0.7287]], grad_fn=<MulBackward0>)

loss = tensor([[-0.7287]], grad_fn=<MulBackward0>)

svm score is: 0.4
new alphas are: tensor([[1000.],
        [   0.],
        [1000.],
        [   0.],
        [   0.],
        [1000.],
        [   0.],
        [   0.],
        [   0.],
        [1000.]], dtype=torch.float64)
rounds = 2 

objective value now is tensor([[-0.]], dtype=torch.float64, grad_fn=<MulBackward0>)

loss = tensor([[-0.]], dtype=torch.float64, grad_fn=<MulBackward0>)



KeyboardInterrupt: 

In [None]:
'''
several hyperparameters are adjustable, refer to them in block 7.

HID_DIM = 16   # this refers to the hidden size of Rnn in both encoder/ decoder 
N_LAYERS = 2   # this refers to the numbers of layers of Rnn in both encoder/ decoder
TARG_LENGTH = 16 # this refers to the output sequence length of seq2seq2 module.


block 8 randomly generates data required for training,
this can be used to debug the pipeline,
in actual training process, there is no need to run block 8.


notice that in block 6 -> kernel_np(self, x1, x2), 
and in block 10: before  model.update_alpha(torch.tensor(alpha_list))
crucial datatype transformation should be undertaken

'''
