In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image

import matplotlib.pyplot as plt

import os

import pandas as pd

from PIL import Image

import urllib
from urllib.request import urlretrieve

import numpy as np

from torch.nn.utils.rnn import pad_sequence




#Creating a Dataset

dataPath = "/home/jupyter/novice"
labels_file = os.path.join(dataPath, "vlm.jsonl")
images_folder = os.path.join(dataPath, "images")


class CustomImageDataset(Dataset):
    def __init__(self, labels_file, images_folder, transform=None, target_transform=None):
        self.img_labels = pd.read_json(labels_file, lines=True)
        self.img_dir = images_folder
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        label = np.array([label])
        sample = {"image" : image, "label" : label}
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return sample
    
our_dataset = CustomImageDataset(labels_file, images_folder)


#Implementing SSD300 VGG16 model

weights = torchvision.models.detection.SSD300_VGG16_Weights.DEFAULT
ssd_model = torchvision.models.detection.ssd300_vgg16(
    weights=True, box_score_thresh=0.9
)
ssd_model.eval()  # Set the model to evaluation mode

#Collate_FN

# def collate_fn(data):
#     _, labels, lengths = zip(*data)
#     max_len = max(lengths)
#     n_ftrs = data[0][0].size(1)
#     features = torch.zeros((len(data), max_len, n_ftrs))
#     labels = torch.tensor(labels)
#     lengths = torch.tensor(lengths)
    
#     for i in range(len(data)):
#         j, k = data[i][0].size(0), data([i][o].size(1))
#         features[i] = torch.cat([data[i][0], torch.zeros((max_len - j, k))])
#     return features.float(), labels.long(), lengths.long()

def collate_fn(data: list[tuple[torch.Tensor, torch.Tensor]]):
    tensors, targets = zip(*data)
    features = pad_sequence(tensors, batch_first=True)
    targets = torch.stack(targets)
    return features, targets


# Data Loading




dataloader = DataLoader(our_dataset, batch_size=4, shuffle=False, num_workers=0)

# def show_label_batch(sample_batched):
#     images_batch, labels_batch = sample_batched["image"], sample_batched["label"]
#     batch_size = len(images_batch)
#     im_size = images_batch.size(2)
    
# for i_batch, sample_batched in enumerate(dataloader):
#     print(i_batch, sample_batched["image"].size(),
#           sample_batched["label"].size())
    
#     if i_batch == 3:
#         plt.figure()
#         show_label_batch(sample_batched)
#         plt.axis('off')
#         plt.ioff()
#         plt.show()
#         break
    
    



# Printing Image and Label
# fig = plt.figure()

# for i, sample in enumerate(our_dataset):
#     print(i, sample["image"].shape, sample["label"].shape)
#     print(sample["label"])
#     ax = plt.subplot(1,4,i+1)
#     plt.tight_layout()
#     ax.axis('off')
#     plt.imshow(sample["image"].permute(1,2,0))
#     plt.show()
    
#     if i == 3:
#         plt.show()
#         break



#Image Loading]

train_dataloader = DataLoader(our_dataset, batch_size = 4, shuffle=False, collate_fn=collate_fn)

train_features, train_labels = next(iter(train_dataloader))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels
plt.imshow(img.permute(1,2,0))
plt.show()
print(f"Label: {label}")


##figure out padding and loading multiple images
##transfer learning and fine tuning 
##test output before training (can it detect airplane first)





TypeError: expected Tensor as element 0 in argument 0, but got str