# 1. Loading the data

In [3]:
import torch
import matplotlib.pyplot as plt

print(f"PyTorch version: {torch.__version__}")

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device      
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")


PyTorch version: 2.0.1
Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


In [4]:
from torchvision import transforms

preprocess = transforms.Compose([
    transforms.Resize((240, 320)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [5]:
import torchvision.datasets

ds_ucf_train = torchvision.datasets.UCF101('../resources/UCF-101/data','../resources/UCF-101/label',5,5,train=True, output_format='TCHW', transform=preprocess)
ds_ucf_test = torchvision.datasets.UCF101('../resources/UCF-101/data','../resources/UCF-101/label',5,5,train=False, output_format='TCHW', transform=preprocess)

100%|██████████| 833/833 [04:46<00:00,  2.91it/s]
100%|██████████| 833/833 [04:46<00:00,  2.91it/s]


In [6]:
torch.save(ds_ucf_train, 'ucf101_train.pt')
torch.save(ds_ucf_test, 'ucf101_test.pt')

In [7]:
from torchvision.transforms.functional import resize

def custom_collate_fn(batch):
    images, audio, labels = zip(*batch)
    
    return torch.stack(images), labels

In [8]:
batch_size = 32
class_num = 101

ds_ucf_train_loaded = torch.load('ucf101_train.pt')
data_loader_train = torch.utils.data.DataLoader(ds_ucf_train_loaded, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn, pin_memory=True)

ds_ucf_test_loaded = torch.load('ucf101_test.pt')
data_loader_test = torch.utils.data.DataLoader(ds_ucf_test_loaded, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn, pin_memory=True)

# 2. Model

In [9]:
import torch
import torch.nn as nn
import torchvision.models as models

class ResNetForClassification(nn.Module):
    def __init__(self, pretrained, num_ftrs, class_num):
        super(ResNetForClassification, self).__init__()
        self.pretrained = pretrained
        self.pretrained.fc = nn.Linear(num_ftrs, class_num)
        torch.nn.init.xavier_uniform_(self.pretrained.fc.weight)
        self.relu = nn.ReLU()

    def forward(self, x):
        output = self.pretrained(x)
        output = self.relu(output)
        return output

In [10]:
resnet34_pretrained = models.resnet34(pretrained=True)
num_ftrs = resnet34_pretrained.fc.in_features

resnet34 = ResNetForClassification(resnet34_pretrained, num_ftrs, class_num).to(device)




In [11]:
total_batches_train = len(data_loader_train)
total_batches_test = len(data_loader_test)

In [12]:
def accuracy_top3(y, targets):
    accuracies = []
    _, top_3_indices = torch.topk(y, 3)
    for target,result in zip(targets,top_3_indices):
        if target in result:
            accuracies.append(1)
        else:
            accuracies.append(0)

    return sum(accuracies) / len(accuracies)


In [13]:
import torch.optim as optim
from timeit import default_timer as timer 

accuracies = []
num_epochs = 1
lr = 1e-5
weight_decay = 5e-4

loss_function = nn.MSELoss()

# Freeze all layers
for param in resnet34.parameters():
    param.requires_grad = False

# Unfreeze the last layer (assuming it's named "layerN")
for name, param in resnet34.named_parameters():
    if "fc" in name:
        param.requires_grad = True

optimizer = optim.Adam(resnet34.parameters(), lr=lr, weight_decay=weight_decay)

start_t = timer()

for epoch in range(num_epochs): 
    for int, (X, y) in enumerate(data_loader_train):    
        X = X.reshape(32*5, 3, 240, 320).to(device)
        optimizer.zero_grad()

        y_diagonal_array = torch.zeros([batch_size*5, class_num], device=device)
        for i in range(batch_size):
            y_diagonal_array[i][y[i]] = 1

        y_diagonal_array = y_diagonal_array.to(device)

        resnet34.train()
        res = resnet34(X)
        accuracy = accuracy_top3(res, list(y))
        accuracies.append(accuracy)
        print(accuracy)
        loss = loss_function(res, y_diagonal_array)
        print(res)
        print(y_diagonal_array)
        loss.backward()
        optimizer.step()

end_t = timer()
print("Total time: ", end_t - start_t)

# print(prof.key_averages().table(sort_by="self_cpu_time_total"))
print("Total acc", len(accuracies))
plt.plot(accuracies)
plt.xlabel('Batch')
plt.ylabel('Top-3 Accuracy')
plt.title('Training Top-3 Accuracy')
plt.ylim(0, 1)
plt.show()



TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>

In [None]:
for i, (X, y) in enumerate(data_loader_test):
    print(i,'th batch out of', total_batches_test)