In [None]:
import pandas as pd
import numpy as np
import pickle

In [None]:
# text data
df = pd.read_csv("train_df.tsv", sep="\t")

# img descriptions
with open("D_train.pkl", "rb") as f:
    image_descriptions = pickle.load(f)
print("Image Descriptions (First 5 Records):")
for key, value in list(image_descriptions.items())[:5]:
    print(f"{key}: {value}")

# detected objects
with open("O_train.pkl", "rb") as f:
    detected_objects = pickle.load(f)
print("\nDetected Objects (First 5 Records):")
for key, value in list(detected_objects.items())[:5]:
    print(f"{key}: {value}")

In [None]:
import torch
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms

class MuSEDataset(Dataset):
    def __init__(self, text_file, image_desc_file, obj_file, image_folder, tokenizer, transform=None):
        # text data
        self.text_data = pd.read_csv(text_file, sep="\t")
        
        # img descriptions
        with open(image_desc_file, "rb") as f:
            self.image_descriptions = pickle.load(f)
        
        # detected objects
        with open(obj_file, "rb") as f:
            self.detected_objects = pickle.load(f)
        
        self.image_folder = image_folder
        self.tokenizer = tokenizer
        self.transform = transform if transform else transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def __len__(self):
        return len(self.text_data)
    
    def __getitem__(self, idx):
        # Get the text data and corresponding image data
        row = self.text_data.iloc[idx]
        text = row["text"]  
        image_name = row["pid"]  
        sarcasm_label = row["target_of_sarcasm"]  
        
        # Load and preprocess image
        image_path = f"{self.image_folder}/{image_name}"
        image = Image.open(image_path).convert("RGB")
        image = self.transform(image)
        
        # Get image description and detected objects
        img_desc = self.image_descriptions.get(image_name, "")
        detected_objs = self.detected_objects.get(image_name, "")
        
        # Tokenize text
        text_inputs = self.tokenizer(text, padding='max_length', truncation=True, return_tensors="pt")
        
        return {
            "text_input_ids": text_inputs["input_ids"].squeeze(0),
            "text_attention_mask": text_inputs["attention_mask"].squeeze(0),
            "image": image,
            "image_description": img_desc,
            "detected_objects": detected_objs,
            "sarcasm_label": torch.tensor(sarcasm_label, dtype=torch.long)
        }

# Example usage
# dataset = MuSEDataset("train_df.tsv", "D_train.pkl", "O_train.pkl", "images", tokenizer)
