In [4]:
#!/usr/bin/env python3

import os
import sys
import numpy as np 
import argparse 
import time 

import torch 
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.functional as F 
from tc.tc_fc import TTLinear 

from torchvision import datasets, transforms

seed = 7
np.random.seed(seed)
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

batch_size =200
input_tensor = [7, 4, 7, 4]
hidden_tensors= [[8, 4, 8, 4], [8, 4, 8, 4], [8, 4, 8, 8]]
n_epochs = 5


# class tt_model(nn.Module):
#     def __init__(self, hidden_tensors, input_tensor, output_dim, tt_rank):
#         super(tt_model, self).__init__()
#         if len(hidden_tensors) != 3:
#             raise ValueError('The depth of hidden layers should be 3!')

#         self.TTLinear1 = TTLinear(input_tensor, hidden_tensors[0], tt_rank=tt_rank)
#         self.TTLinear2 = TTLinear(hidden_tensors[0], hidden_tensors[1], tt_rank=tt_rank)
#         self.TTLinear3 = TTLinear(hidden_tensors[1], hidden_tensors[2], tt_rank=tt_rank)
#         self.fc4 = nn.Linear(np.prod(hidden_tensors[2]), output_dim)

#     def forward(self, inputs):
#         out = self.TTLinear1(inputs)
#         out = self.TTLinear2(out)
#         out = self.TTLinear3(out)
#         out = self.fc4(out)

#         return F.log_softmax(out, dim=1)

class tt_autoencoder(nn.Module):
    def __init__(self, hidden_tensors, input_tensor, output_dim, tt_rank):
        super(tt_autoencoder, self).__init__()
        self.encoder1 = TTLinear(input_tensor, hidden_tensors[0], tt_rank=tt_rank)
        self.encoder2 = TTLinear(hidden_tensors[0], hidden_tensors[1], tt_rank=tt_rank)
        self.encoder3 = TTLinear(hidden_tensors[1], hidden_tensors[2], tt_rank=tt_rank)
        self.decoder1 = TTLinear(hidden_tensors[2],hidden_tensors[1], tt_rank=tt_rank),
        self.decoder2 = TTLinear(hidden_tensors[1],hidden_tensors[0], tt_rank=tt_rank),
        self.decoder3 = TTLinear(hidden_tensors[0],input_tensor, tt_rank=tt_rank)

    def forward(self, inputs):
        ### Encoder layer
        out = self.encoder1(inputs)
        out = self.encoder2(out)
#         out = self.encoder3(out)
        ### Decoder Layer with activation
#         out = self.decoder1(out)
        out = self.decoder2(out)
        out = F.sigmoid(self.decoder3(out))
        return out

In [5]:
if __name__=='__main__':


    ### get data
    # convert data to torch.FloatTensor
    transform = transforms.ToTensor()

    # load the training and test datasets
    train_data = datasets.MNIST(root='data', train=True,
                                       download=True, transform=transform)
    test_data = datasets.MNIST(root='data', train=False,
                                      download=True, transform=transform)
    # Create training and test dataloaders

    # number of subprocesses to use for data loading
    num_workers = 0
    # how many samples per batch to load
    batch_size = 20

    # prepare data loaders
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)
    tt_rank = [1, 2, 2, 2, 1]
    print('Building a Tensor-Train model...')
    model = tt_autoencoder(hidden_tensors, input_tensor, 10, tt_rank).to(device)
    
    print("Model's state_dict:")
    for param_tensor in model.state_dict():
        print(param_tensor, "\t", model.state_dict()[param_tensor].size())

    lr = 0.001
    # specify loss function
    criterion = nn.MSELoss()

    # specify loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


    # number of epochs to train the model
    n_epochs = 10

Building a Tensor-Train model...
Model's state_dict:
encoder1.b 	 torch.Size([1])
encoder1.W_cores.0 	 torch.Size([1, 7, 8, 2])
encoder1.W_cores.1 	 torch.Size([2, 4, 4, 2])
encoder1.W_cores.2 	 torch.Size([2, 7, 8, 2])
encoder1.W_cores.3 	 torch.Size([2, 4, 4, 1])
encoder2.b 	 torch.Size([1])
encoder2.W_cores.0 	 torch.Size([1, 8, 8, 2])
encoder2.W_cores.1 	 torch.Size([2, 4, 4, 2])
encoder2.W_cores.2 	 torch.Size([2, 8, 8, 2])
encoder2.W_cores.3 	 torch.Size([2, 4, 4, 1])
encoder3.b 	 torch.Size([1])
encoder3.W_cores.0 	 torch.Size([1, 8, 8, 2])
encoder3.W_cores.1 	 torch.Size([2, 4, 4, 2])
encoder3.W_cores.2 	 torch.Size([2, 8, 8, 2])
encoder3.W_cores.3 	 torch.Size([2, 4, 8, 1])
decoder3.b 	 torch.Size([1])
decoder3.W_cores.0 	 torch.Size([1, 8, 7, 2])
decoder3.W_cores.1 	 torch.Size([2, 4, 4, 2])
decoder3.W_cores.2 	 torch.Size([2, 8, 7, 2])
decoder3.W_cores.3 	 torch.Size([2, 4, 4, 1])


In [6]:
for epoch in range(1, n_epochs+1):
    # monitor training loss
    train_loss = 0.0

    ###################
    # train the model #
    ###################
    for data in train_loader:
        # _ stands in for labels, here
        images, _ = data
        # flatten images
        images = images.view(images.size(0), -1)
        images = images.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        outputs = model(images)
        # calculate the loss
        loss = criterion(outputs, images)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*images.size(0)

    # print avg training statistics 
    train_loss = train_loss/len(train_loader)
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch, 
        train_loss
        ))


torch.save(model.state_dict(),"ae_tt.pt")



Epoch: 1 	Training Loss: 4.749967


KeyboardInterrupt: 