In [59]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
# from torch.utils.data import DataLoader
import numpy as np
import torchvision
from torchvision import datasets, transforms
from torchvision.models import inception_v3, Inception_V3_Weights
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
from models import initialize_vision_model

device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")

In [32]:
print(f'Device: {device}')
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

Device: cuda:0
PyTorch Version:  1.13.1+cu117
Torchvision Version:  0.14.1+cu117


In [33]:
data_dir = "./data"
model_name = "inception"
num_classes = 4
batch_size = 8
epochs = 5
feature_extract = True

In [34]:
image_dataset = datasets.ImageFolder(data_dir)

In [35]:
image_dataset

Dataset ImageFolder
    Number of datapoints: 5321
    Root location: ./data

In [36]:
classes =  image_dataset.classes
num_classess = len(classes)
print(classes)
print(f'Num of Classes: {len(classes)}')

['black', 'blue', 'green', 'other']
Num of Classes: 4


In [37]:
image_dataset.class_to_idx

{'black': 0, 'blue': 1, 'green': 2, 'other': 3}

In [49]:
# image_dataset.imgs

In [52]:
# use random_split to split data into train, val, and test sets
def split_dataset(image_dataset=image_dataset, test_size=0.2):
    
    total_size = len(image_dataset)
    
    test_size = int(total_size * test_size)
    train_val_size = total_size - test_size
    val_size = test_size
    
    split_sizes = [train_val_size, test_size]
    train_val_split, test_split = random_split(image_dataset, split_sizes)

    split_sizes = [train_val_size - val_size, val_size]
    train_split, val_split = random_split(train_val_split, split_sizes)

    print(f'Train size: {len(train_split)}')
    print(f'Val size: {len(val_split)}')
    print(f'Test size: {len(test_split)}')

    return train_split, val_split, test_split

train_set, val_set, test_set = split_dataset()

Train size: 3193
Val size: 1064
Test size: 1064


In [57]:
def load_datasets_and_dataloaders(input_size, train_set, val_set, test_set):
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    
    train_set = Dataset(train_set, data_transforms['train'])
    val_set = Dataset(val_set, data_transforms['val'])
    test_set = Dataset(test_set, data_transforms['val'])
    
    dataloaders_dict = {
        'train': DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4),
        'val': DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=4)
    }
    
    test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=4)
    
    print("Loading Datasets and Initializing DataLoaders...")
    return test_dataloader, dataloaders_dict

In [58]:
class MultiModalGarbageModel(torch.nn.Module):
    def __init__(self, num_classes, loss_fn, text_module, image_module, 
                 text_feat_dim, image_feat_dim, fusion_output_size, dropout_p):
        super(MultiModalGarbageModel, self).__init__()
        self.text_module = text_module
        self.image_module = image_module
        self.fusion = torch.nn.Linear((text_feat_dim + image_feat_dim), fusion_output_size)
        self.fc = torch.nn.Linear(fusion_output_size, num_classes)
        self.criterion = loss_fn
        self.dropout = torch.nn.Dropout(dropout_p)

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text, offsets):
        embedded = self.embedding(text, offsets)
        return self.fc(embedded)

In [56]:
test_dataloader, dataloaders_dict = load_datasets_and_dataloaders(input_size, train_set, val_set, test_set)

NameError: name 'input_size' is not defined