In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models
from transformers import BertTokenizer, VisualBertModel
from PIL import Image
import os
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df = pd.read_json("../data/facebook/train.json")
dev_df = pd.read_json("../data/facebook/dev.json")
train_df.head()

Unnamed: 0,id,img,text,label
0,42953,train/non_hateful/42953.png,it their charact not their color that matter,0
1,23058,train/non_hateful/23058.png,dont be afraid to love again everyon is not li...,0
2,13894,train/non_hateful/13894.png,put bow on your pet,0
3,37408,train/non_hateful/37408.png,i love everyth and everybodi except for squirr...,0
4,82403,train/non_hateful/82403.png,everybodi love chocol chip cooki even hitler,0


In [7]:
# Some global variables
BATCH_SIZE = 64
EPOCHS = 10
ROOT_PATH = '../data/facebook'
IMAGE_SIZE = 224*224
NUM_CLASSES = 2
TEXTUAL_DIMENSION = 512
VISUAL_DIMENSION = 512
DEVICE = torch.device('cuda' if torch.cuda.is_available() else'cpu')

In [4]:
# Initialize the dataset and maintain the dataloader
class DynamicDataset(Dataset):
    def __init__(self, json_path, transform = None):
        self.df = pd.read_json(json_path)
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path = self.df.loc[index, 'img']
        img_file = os.path.join(ROOT_PATH, img_path)
        image = Image.open(img_file).convert("RGB")
        if self.transform is not None:
            image = self.transform(image)
        
        text = self.df.loc[index, 'text']
        if 'label' not in self.df.columns:
            return image, text
        label = self.df.loc[index, 'label']

        return image ,text, label

In [5]:
# Define a transform function for image preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create objects of each set of data
train_data = DynamicDataset(os.path.join(ROOT_PATH, 'train.json'), transform = transform)
dev_data = DynamicDataset(os.path.join(ROOT_PATH, 'dev.json'), transform = transform)

# Create a dataloader
train_loader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True)
dev_loader = DataLoader(dev_data, batch_size = BATCH_SIZE, shuffle = True)