In [None]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import StratifiedShuffleSplit

In [None]:
# Check for GPU availability
if torch.cuda.is_available():
    print('GPU is available')
else:
    print('No GPU detected')

num_gpus = torch.cuda.device_count()

if num_gpus > 0:
    print(f"Number of available GPUs: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPUs available")
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to a common size
    transforms.ToTensor()
])

In [None]:
import os
os.getcwd()

In [None]:
dataset = ImageFolder(root='d:\\SoftCom_Assignment01\\Dataset', transform=transform)

In [None]:
labels = np.array(dataset.targets)
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(sss.split(np.zeros(len(labels)), labels))
train_idx, val_idx = next(sss.split(train_idx, labels[train_idx]))

In [None]:
train_dataset = torch.utils.data.Subset(dataset, train_idx)
val_dataset = torch.utils.data.Subset(dataset, val_idx)
test_dataset = torch.utils.data.Subset(dataset, test_idx)

In [None]:
# Hyperparameters
sequence_length = 256*3
input_size = 256
hidden_size = 64
num_layers = 2
num_classes = 3
batch_size = 100
num_iters = 1200
learning_rate = 0.01  # More power so we can learn faster! previously it was 0.001

In [None]:
num_epochs = num_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True, drop_last=True)   # It's better to shuffle the whole training dataset!

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False, drop_last=True)

val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                          batch_size=batch_size,
                                          shuffle=False, drop_last=True)

In [None]:
print(len(train_loader))
print(len(test_loader))
print(len(test_loader))

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42) if torch.cuda.is_available() else None
np.random.seed(42)

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size= hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,bidirectional=True) # For BiDirectional LSTM
        self.fc = nn.Linear(hidden_size*2, num_classes) #For Bidirectional

    def forward(self, x):
        # set initial hidden and cell states
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) #For Bidirectional
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) #For Bidirectional

        #Forward Propagation
        out, _  = self.lstm(x,(h0,c0)) #out: tensor of shape (batch size, seq_length, hidden_size)
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [None]:
'''
INSTANTIATE MODEL CLASS
'''
model = LSTM( input_size, hidden_size, num_layers, num_classes)
# To enable GPU
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
'''
TRAIN THE MODEL
'''
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # print(i, images, labels)
#         print(sequence_length, input_size)
#         print(batch_size)
        
#         print('Shape: ' , images.shape)
        
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
#         print(len(labels))
#         print(len(images))
        
#         print('Shape: ' , images.shape)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 300 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:

                images = images.reshape(-1, sequence_length, input_size).to(device)
                #images = images.to(device)  # Don't reshape here

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs, 1)

                # Total number of labels
                total += labels.size(0)


                # Total correct predictions
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()

            accuracy = 100 * correct.item() / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

print(iter)
print(num_epochs)

In [None]:
# Testing the LSTM model
total = 0
correct = 0

# Disabling gradient calculation
with torch.no_grad():
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        # Forward pass only to get logits/output
        outputs = model(images)

        # Get predictions from the maximum value
        _, predicted = torch.max(outputs.data, 1)

        # Total number of labels
        total += labels.size(0)

        # Total correct predictions
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total

print('Test Accuracy of the model on the test images: {} %'.format(accuracy))