In [69]:
import os
import torch
import numpy as np
import pandas as pd

from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [70]:
batchsize = 100
training_data = datasets.FashionMNIST(root="../fashion_mnist", train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.FashionMNIST(root="../fashion_mnist", train=False, transform=transforms.ToTensor(), download=True)

train_dataloader = DataLoader(training_data, batch_size=batchsize, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batchsize, shuffle=True)

In [71]:
# define hyperparameters
sequence_len = 28
input_len = 28
hidden_size = 128
num_layers = 2
num_classes = 10
num_epochs = 10
learning_rate = 0.01

In [72]:
class LSTM(nn.Module):
    def __init__(self, input_len, hidden_size, num_class, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=input_len, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.ouput_layer = nn.Linear(hidden_size, num_classes)

    def forward(self, X):
        hidden_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
        cell_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
        out, _ = self.lstm(X, (hidden_states, cell_states))
        out = self.ouput_layer(out[:, -1, :])
        return out

In [73]:
model = LSTM(input_len, hidden_size, num_classes, num_layers)
print(model)

LSTM(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (ouput_layer): Linear(in_features=128, out_features=10, bias=True)
)


In [74]:
loss_func = nn.CrossEntropyLoss()
sgd = optim.SGD(model.parameters(), lr=learning_rate)
adam = optim.Adam(model.parameters(), lr=learning_rate)

In [75]:
def train(num_epochs, model, traindataloader, loss_function, optimizer):
    total_steps = len(traindataloader)

    for epoch in range(num_epochs):
        for batch, (images, labels) in enumerate(traindataloader):
            images = images.reshape(-1, sequence_len, input_len)

            output = model(images)
            loss = loss_function(output, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if(batch+1)%100 == 0:
                print(f"Epoch: {epoch+1}: Batch {batch+1} / {total_steps}; Loss: {loss.item():>4}")

In [76]:
train(num_epochs, model, train_dataloader, loss_func, sgd)

Epoch: 0: Batch 99 / 600; Loss: 2.3027312755584717
Epoch: 0: Batch 199 / 600; Loss: 2.3028528690338135
Epoch: 0: Batch 299 / 600; Loss: 2.2928240299224854
Epoch: 0: Batch 399 / 600; Loss: 2.2971718311309814
Epoch: 0: Batch 499 / 600; Loss: 2.2872400283813477
Epoch: 0: Batch 599 / 600; Loss: 2.2885189056396484
Epoch: 1: Batch 99 / 600; Loss: 2.285217761993408
Epoch: 1: Batch 199 / 600; Loss: 2.2799270153045654
Epoch: 1: Batch 299 / 600; Loss: 2.276942729949951
Epoch: 1: Batch 399 / 600; Loss: 2.273347854614258
Epoch: 1: Batch 499 / 600; Loss: 2.2701456546783447
Epoch: 1: Batch 599 / 600; Loss: 2.2612640857696533
Epoch: 2: Batch 99 / 600; Loss: 2.2570202350616455
Epoch: 2: Batch 199 / 600; Loss: 2.2480807304382324
Epoch: 2: Batch 299 / 600; Loss: 2.241544723510742
Epoch: 2: Batch 399 / 600; Loss: 2.2122035026550293
Epoch: 2: Batch 499 / 600; Loss: 2.211308479309082
Epoch: 2: Batch 599 / 600; Loss: 2.172844171524048
Epoch: 3: Batch 99 / 600; Loss: 2.154489517211914
Epoch: 3: Batch 199 / 6

In [77]:
test_images, test_label = next(iter(test_dataloader))
test_label

tensor([7, 8, 3, 0, 6, 1, 0, 2, 2, 8, 5, 9, 0, 8, 1, 5, 0, 5, 0, 1, 4, 0, 8, 5,
        6, 7, 0, 7, 6, 3, 2, 7, 6, 0, 2, 8, 0, 0, 9, 3, 4, 1, 6, 4, 5, 8, 9, 9,
        7, 0, 5, 7, 9, 2, 6, 0, 8, 6, 7, 3, 1, 4, 3, 8, 4, 4, 5, 5, 6, 8, 9, 0,
        0, 3, 9, 6, 3, 3, 1, 4, 6, 8, 4, 3, 8, 3, 2, 0, 4, 4, 9, 2, 7, 4, 6, 1,
        5, 9, 9, 9])