In [1]:
import numpy as np
import torch
from torch import nn, optim

import time
import os
import random

from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Block 1: Encoder
class Encoder(nn.Module):
    def __init__(self, 
               input_size = 1,
               embedding_size = 128,
               hidden_size = 256,
               n_layers = 4,
               dropout = 0.5):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.linear = nn.Linear(input_size, embedding_size, n_layers)
        self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, 
                        dropout = dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        """
        x: input batch data, 
        size of x: [sequence len, batch size, feature size]
        """

        # size of embedded : [sequence len, batch size, embedding size]
        embedded = self.dropout(F.relu(self.linear(x)))

        output, (hidden, cell) = self.rnn(embedded)
        # hidden: the last step hidden of each layer of rnn
        # size of hidden : [num of layers * num directions, batch size, hidden size]
        # num of directions is 1, since we are useing signle directional rnn
        # cell: the last step cell of each layer of rnn
        # size of cell: [num of layers * num of directions, batch size, hidden size]
        
        return hidden, cell

In [3]:
# Block 2: Decoder
class Decoder(nn.Module):
    def __init__(self, 
                output_size = 1,
                embedding_size = 128,
                hidden_size = 256,
                n_layers = 4,
                dropout = 0.5):
        super().__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Linear(output_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
        self.linear = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, hidden, cell):
        """
        x: input batch data, 
        size of x: [batch size, feature size]
        x is only 2-dimensional, since the input is batches of last coordinate of the sequence,
        so the sequence length has been removed
        """

        # add a sequence dimension to the front of x, to allow for use of nn.LSTM method
        x = x.unsqueeze(0)
        # size(x) now becomes [1, batch size, feature size]
        embedded = self.dropout(F.relu(self.embedding(x)))

        # size of output : [seq len, batch size, hidden dimension * num of directions]
        # size of hidden : [num of layers * num of directions, batch size, hidden dim]
        # size of cell : [num of layers * num of directions, batch size, hidden dim]

        # notice that sequence len and num of directions will always be 1 in the Decoder, therfore:
        # size of output : [1, batch size, hidden dimension]
        # size of hidden : [num of layers, batch size, hidden dim]
        # size of cell : [num of directions, batch size, hidden dim]

        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))

        # prediction = [batch size, output size]
        prediction = self.linear(output.squeeze(0))

        return prediction, hidden, cell


In [4]:
# Block 3: Seq2seq
class Seq2seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        #self.device = device

        assert encoder.hidden_size == decoder.hidden_size, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"

    def forward(self, x, y, teacher_forcing_ratio = 0.5):
        """
        size of x : [observed sequence len, batch size, feature size]
        size of y : [target sequence len, batch size, feature size]
        """
        batch_size = x.shape[1]
        target_len = y.shape[0]
        
        # tensor to store decoder outputs of each time step
        outputs = torch.zeros(y.shape)
        
        # last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(x)

        # first input to decoder is last coordinates of x
        decoder_input = x[-1, :, :]
        
        for i in range(target_len):
            # run decode for one time step
            output, hidden, cell = self.decoder(decoder_input, hidden, cell)
            
            # place predictions in a tensor holding predictions for each time step
            outputs[i] = output

            # decide if we are going to use teacher forcing or not
            teacher_forcing = random.random() < teacher_forcing_ratio

            # output is the same shape as input, [batch_size, feature size]
            # use output directly as input or use true lable depending on
            # teacher_forcing is true or not
            decoder_input = y[i] if teacher_forcing else output

        return outputs


In [5]:
# Block 4: RBF
class RBF(nn.Module):
    def __init__(self):
        super(RBF, self).__init__()
        self.sigma = nn.Parameter(torch.Tensor(1))
        self.reset_parameters()
        
    def reset_parameters(self):
        nn.init.constant_(self.sigma, 1)
    
    def forward(self, x1, x2):
        '''
        size of x1/x2 : [input sequence len, batch size, feature size],
        for our task, the last two sizes are both 1.
        '''
        
        value = (x1 - x2).pow(2).sum(0).pow(0.5) / self.sigma
        
        return torch.exp(-value)

In [6]:
# Block 5: Kernel
# class Kernel refers to the structure combining Seq2seq module and RBF module.
class Kernel(nn.Module):
    def __init__(self, seq2seq, rbf, target_length, output_dim):
        super().__init__()
        # target_length: seq2seq2 output sequence length
        self.target_length = target_length
        # output_dim: seq2seq2 output embedding size; in our case being 1
        self.output_dim = output_dim
        
        self.seq2seq = seq2seq
        self.rbf = rbf
    
    def forward(self, x1, x2):
        """
        size of x1/x2 : [observed sequence len, batch size, feature size]
        """
        # size of output_size corresponds to the size of seq2seq output
        output_size = torch.randn(self.target_length, 1, self.output_dim)
        
        outputs1 = self.seq2seq(x1, output_size)
        outputs2 = self.seq2seq(x2, output_size)
        
        # size of value : [batch size, feature size], both being 1 in our case
        value = self.rbf(outputs1, outputs2)
        
        return value
        

In [7]:
# Block 6: Model
class Model(nn.Module):
    def __init__(self, kernel, xs, ys):
        super().__init__()
        # xs is a list of input data x,
        # where the size of x is: [sequence len, batch size, feature size]
        self.xs = xs
        # ys is a list of label y.
        # size of y: [batch size]
        self.ys = ys
        # data_length: num of items
        self.data_length = len(ys)
        # size of alphas: [num of items, batch size]
        self.alphas = torch.randn(self.data_length, 1)
        self.kernel = kernel
        self.kernel_np = lambda x1, x2: kernel(x1, x2).detach().numpy()
        
    def forward(self):
        
        value = torch.zeros(1,1)
        
        for i in range(self.data_length):
            for j in range(self.data_length):
                # the i-j term of dual kernal-svm objective
                term = self.alphas[i]*self.alphas[j]*self.ys[i]*self.ys[j]*self.kernel(self.xs[i],self.xs[j])  
                value = torch.add(value, term)
        
        value = -0.5*value
        
        return value
    
    def update_alpha(self, alphas):
        self.alphas = alphas


In [8]:
# block 7, initialize the model

INPUT_DIM = 1
OUTPUT_DIM = 1
ENC_EMB_DIM = 128
DEC_EMB_DIM = 128
HID_DIM = 256
N_LAYERS = 4
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

TARG_LENGTH = 13

dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("devivce=", dev)

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
seq = Seq2seq(enc, dec)
rbf = RBF()
kernel = Kernel(seq, rbf, TARG_LENGTH, OUTPUT_DIM)

# WARNING ------------ interface with wzy --------------------------------------------------------------- 
# this is the interface to incorporate traing datasets into the model
# X_train should be a list of input data x,
# where x should be a torch.tensor, size: [sequence len, batch size, feature size]
# Y_train should be a list of label y.
# where y should be a torch.tensor, size: [batch size]
# in our case: batch size and feature size are both set to 1
# refer to block 6.

# generating random test sample

x1 = torch.randn(123, 1, INPUT_DIM)
x2 = torch.randn(12, 1, INPUT_DIM)
x3 = torch.randn(13, 1, INPUT_DIM)
x4 = torch.randn(23, 1, INPUT_DIM)
y1 = torch.Tensor([[1]])
y2 = torch.Tensor([[1]])
y3 = torch.Tensor([[1]])
y4 = torch.Tensor([[1]])
X_train = [x1, x2, x3, x4]
Y_train = [y1, y2, y3, y4]
model = Model(kernel, X_train, Y_train).to(dev)

print(model)

devivce= cpu
Model(
  (kernel): Kernel(
    (seq2seq): Seq2seq(
      (encoder): Encoder(
        (linear): Linear(in_features=1, out_features=128, bias=True)
        (rnn): LSTM(128, 256, num_layers=4, dropout=0.5)
        (dropout): Dropout(p=0.5, inplace=False)
      )
      (decoder): Decoder(
        (embedding): Linear(in_features=1, out_features=128, bias=True)
        (rnn): LSTM(128, 256, num_layers=4, dropout=0.5)
        (linear): Linear(in_features=256, out_features=1, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
      )
    )
    (rbf): RBF()
  )
)


In [9]:
from svm.svm import SVM
svm = SVM()

In [None]:
# block 8, training.

class myCustom(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, output):
        return output
    
criterion = myCustom()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

print("training begin")
for rounds in range(1000):
    for epoch in range(5):
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize kernel-svm objective wrt \theta (kernel parameters)
        output = model()
        loss = criterion(output)
        loss.backward()
        optimizer.step()
        
        print(loss)
        
        
        # WARNING------------------------------------------this block needs to be modified----------
        # call kernel-svm solver to optimize objective wrt alpha (at the same time, passing the current kernel)
        # returns the updated new_alphas
        new_alphas = svm.fit(X_train, 
                             Y_train,
                             kernel=model.kernel_np)
        print(new_alphas)
        print(svm.score(X_train, Y_train))
        # -------------------------------------------------------------
        
        model.update_alpha(torch.tensor(new_alphas))
    

training begin
tensor([[-0.0242]], grad_fn=<MulBackward0>)
Academic license - for non-commercial use only - expires 2022-10-26
Using license file C:\Users\NHT\gurobi.lic


  gram = np.array([[Y[i] * Y[j] * self.kernel(X[i], X[j]) for j in range(N)] for i in range(N)])
  gram = np.array([[Y[i] * Y[j] * self.kernel(X[i], X[j]) for j in range(N)] for i in range(N)])
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return array(a, dtype, copy=False, order=order, subok=True)


[0. 0. 0. 0.]
1.0
tensor([[-0.]], grad_fn=<MulBackward0>)
[0. 0. 0. 0.]
1.0
tensor([[-0.]], grad_fn=<MulBackward0>)
[0. 0. 0. 0.]
1.0
