## Environment and Dataset Setup

In [2]:
# Step 1: Install Required Libraries
# This step installs necessary libraries for NLP and image processing
!pip install transformers torch torchvision scikit-learn pandas numpy matplotlib datasets kaggle

# Step 2: Upload and Extract Dataset
import zipfile
import os

# Define dataset path
dataset_zip_path = "/content/twitter-dataset.zip"

# Check if the file is uploaded
if not os.path.exists(dataset_zip_path):
    from google.colab import files
    print("📂 Please upload the dataset .zip file")
    uploaded = files.upload()  # Upload the dataset manually

# Extract the dataset
extract_folder = "/content/dataset"
if not os.path.exists(extract_folder):  # Extract only if not already extracted
    with zipfile.ZipFile(dataset_zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_folder)

print("✅ Dataset extracted successfully!")

# Step 3: Load and Explore the Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define extracted dataset paths
dataset_folder = "/content/dataset"  # Path where the dataset is extracted

# Load labeled text dataset
labeled_text_path = os.path.join(dataset_folder, "LabeledText.xlsx")  # Adjust if file name is different
df_text = pd.read_excel("/content/dataset/LabeledText.xlsx")

print("Dataset Columns:", df_text.columns)
print("\nFirst few rows:\n", df_text.head())

# Load image paths
image_folder = os.path.join(dataset_folder, "Images/Images")  # Adjust if folder structure is different
image_files = []
for sentiment in os.listdir(image_folder):
    sentiment_folder = os.path.join(image_folder, sentiment)
    if os.path.isdir(sentiment_folder):
        for img_file in os.listdir(sentiment_folder):
            image_files.append((sentiment, img_file))

df_images = pd.DataFrame(image_files, columns=["Sentiment", "Image File"])
print("\nTotal Images:", len(df_images))
print("Unique Sentiments:", df_images["Sentiment"].unique())
print("\nFirst few image entries:")
print(df_images.head())

✅ Dataset extracted successfully!
Dataset Columns: Index(['File Name', 'Caption', 'LABEL'], dtype='object')

First few rows:
   File Name                                            Caption     LABEL
0     1.txt      How I feel today #legday #jelly #aching #gym   negative
1    10.txt  @ArrivaTW absolute disgrace two carriages from...  negative
2   100.txt  This is my Valentine's from 1 of my nephews. I...  positive
3  1000.txt  betterfeelingfilms: RT via Instagram: First da...   neutral
4  1001.txt         Zoe's first love #Rattled @JohnnyHarper15   positive

Total Images: 4869
Unique Sentiments: ['Negative' 'positive' 'Neutral']

First few image entries:
  Sentiment Image File
0  Negative   1265.jpg
1  Negative    193.jpg
2  Negative   4759.jpg
3  Negative   1307.jpg
4  Negative   4845.jpg


In [3]:
import os
import torch
import gc

# Prevent CUDA errors by forcing CPU use
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Clear cache before reloading model
gc.collect()
torch.cuda.empty_cache()

print("✅ GPU reset and memory cleared. Ready to proceed!")

✅ GPU reset and memory cleared. Ready to proceed!


## Natural Language Processing Component

In [4]:
# Step 4: NLP Preprocessing - Tokenization & Embeddings
# Convert text into a format suitable for a machine learning model
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset

# Disable wandb logging
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

# Encode sentiment labels
df_text["LABEL"] = df_text["LABEL"].map({'positive': 0, 'neutral': 1, 'negative': 2})
label_mapping = {'positive': 0, 'neutral': 1, 'negative': 2}  # Ensure label mapping exists globally

# Split dataset into training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df_text["Caption"].tolist(), df_text["LABEL"].tolist(), test_size=0.2, random_state=42
)

# Tokenization using DistilBERT
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

# Convert labels into tensors
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Define dataset class
class SentimentDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

# Prepare dataset
train_dataset = SentimentDataset(train_encodings, train_labels)
test_dataset = SentimentDataset(test_encodings, test_labels)

# Step 5: Train and Evaluate Text Sentiment Model
# Load pre-trained model
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results", evaluation_strategy="epoch", save_strategy="epoch",
    per_device_train_batch_size=8, per_device_eval_batch_size=8, num_train_epochs=2,
    weight_decay=0.01, logging_dir="./logs", logging_steps=10,
    report_to="none"  # Disable wandb completely
)

# Initialize Trainer
trainer = Trainer(
    model=model, args=training_args, train_dataset=train_dataset, eval_dataset=test_dataset
)

# Train model
trainer.train()

# Evaluate model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Predict on test set
predictions = trainer.predict(test_dataset)
predicted_labels = np.argmax(predictions.predictions, axis=1)

# Calculate evaluation metrics
accuracy = accuracy_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels, average='weighted')
recall = recall_score(test_labels, predicted_labels, average='weighted')
f1 = f1_score(test_labels, predicted_labels, average='weighted')

# Print results
print(f"✅ Text Model Evaluation Complete!")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.5554,0.619235
2,0.5034,0.6737


✅ Text Model Evaluation Complete!
Accuracy: 0.7587
Precision: 0.7575
Recall: 0.7587
F1 Score: 0.7578


## Computer Vision Component

In [7]:
# Step 6: Define Image Feature Extraction and Fine-Tune ResNet
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

# Ensure we're using the GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load ResNet model with fine-tuning enabled
resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_features = resnet.fc.in_features
resnet.fc = nn.Linear(num_features, 3)  # Adjust output for 3 sentiment classes
resnet = resnet.to(device)  # Move model to GPU if available

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.parameters(), lr=0.0001)  # Lower LR for fine-tuning

# Custom dataset class for image sentiment classification
class ImageSentimentDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_folder = "/content/dataset/Images/Images"  # Ensure correct dataset path
        img_path = os.path.join(image_folder, self.df.iloc[idx]["Sentiment"], self.df.iloc[idx]["Image File"])

        label = label_mapping[self.df.iloc[idx]["Sentiment"].lower()]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        return image, label

# Step 7: Train Image Sentiment Classification Model

# Split dataset into train/test
train_df, test_df = train_test_split(df_images, test_size=0.2, random_state=42, stratify=df_images["Sentiment"])
train_dataset = ImageSentimentDataset(train_df, transform)
test_dataset = ImageSentimentDataset(test_df, transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

# Train ResNet50 for sentiment classification
resnet.train()

num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    correct, total = 0, 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Accuracy: {correct/total:.4f}")

print("✅ Fine-Tuned ResNet Model Training Complete!")

# Evaluate Fine-Tuned ResNet
resnet.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = resnet(images)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().tolist())  # Move back to CPU for evaluation
        y_true.extend(labels.cpu().tolist())

# Compute evaluation metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

# Print results
print(f"✅ Fine-Tuned ResNet Model Evaluation Complete!")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 54.3MB/s]
Epoch 1/10: 100%|██████████| 244/244 [01:16<00:00,  3.19it/s]


Epoch 1, Loss: 274.4686, Accuracy: 0.3659


Epoch 2/10: 100%|██████████| 244/244 [01:07<00:00,  3.64it/s]


Epoch 2, Loss: 245.0916, Accuracy: 0.4958


Epoch 3/10: 100%|██████████| 244/244 [01:07<00:00,  3.61it/s]


Epoch 3, Loss: 194.2871, Accuracy: 0.6460


Epoch 4/10: 100%|██████████| 244/244 [01:07<00:00,  3.63it/s]


Epoch 4, Loss: 124.5782, Accuracy: 0.7946


Epoch 5/10: 100%|██████████| 244/244 [01:06<00:00,  3.68it/s]


Epoch 5, Loss: 84.7828, Accuracy: 0.8693


Epoch 6/10: 100%|██████████| 244/244 [01:06<00:00,  3.65it/s]


Epoch 6, Loss: 55.0202, Accuracy: 0.9237


Epoch 7/10: 100%|██████████| 244/244 [01:06<00:00,  3.69it/s]


Epoch 7, Loss: 45.3669, Accuracy: 0.9361


Epoch 8/10: 100%|██████████| 244/244 [01:06<00:00,  3.68it/s]


Epoch 8, Loss: 34.3089, Accuracy: 0.9502


Epoch 9/10: 100%|██████████| 244/244 [01:06<00:00,  3.68it/s]


Epoch 9, Loss: 42.7005, Accuracy: 0.9363


Epoch 10/10: 100%|██████████| 244/244 [01:06<00:00,  3.64it/s]


Epoch 10, Loss: 37.5997, Accuracy: 0.9440
✅ Fine-Tuned ResNet Model Training Complete!
✅ Fine-Tuned ResNet Model Evaluation Complete!
Accuracy: 0.4025
Precision: 0.4204
Recall: 0.4025
F1 Score: 0.3827


## Fusion Model

In [16]:
# Step 8: Extract Features for Fusion Model
import torch
import numpy as np
from torch.utils.data import DataLoader

# Move model to CPU to avoid CUDA memory issues
device = torch.device("cpu")
model.to(device)

# Extract text embeddings (BERT)
def get_bert_embeddings(dataset):
    model.eval()  # Set model to eval mode
    embeddings = []
    dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

    with torch.no_grad():
        for batch in dataloader:
            # Access data correctly from SentimentDataset
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            # Get hidden states instead of classification output
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)

            # Access the last hidden state
            hidden_states = outputs.hidden_states[-1]

            # Extract the [CLS] token embedding
            cls_embeddings = hidden_states[:, 0, :].cpu().numpy()
            embeddings.extend(cls_embeddings)

    return np.array(embeddings)

# Recreate NLP datasets
train_dataset_nlp = SentimentDataset(train_encodings, train_labels)
test_dataset_nlp = SentimentDataset(test_encodings, test_labels)

# Compute text embeddings using NLP datasets
train_text_features = get_bert_embeddings(train_dataset_nlp)
test_text_features = get_bert_embeddings(test_dataset_nlp)

print(f"✅ Extracted text features: {train_text_features.shape}, {test_text_features.shape}")

# Recreate Image datasets for feature extraction (Assuming ImageSentimentDataset and transform are defined in previous cells)
train_dataset_images = ImageSentimentDataset(train_df, transform)
test_dataset_images = ImageSentimentDataset(test_df, transform)

# Define function to extract image features
def get_resnet_embeddings(dataset):
    # Ensure ResNet is loaded and on the correct device
    resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    num_features = resnet.fc.in_features
    resnet.fc = nn.Linear(num_features, 3)
    resnet = resnet.to(device)
    resnet.eval()  # Set ResNet to eval mode

    embeddings = []
    dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = resnet(images)  # Get ResNet outputs
            embeddings.extend(outputs.cpu().numpy())  # Move to CPU and add to list

    return np.array(embeddings)

# Compute image embeddings using ResNet
train_image_features = get_resnet_embeddings(train_dataset_images)
test_image_features = get_resnet_embeddings(test_dataset_images)

# Save the extracted image features
np.save("fine_tuned_image_features.npy", np.concatenate([train_image_features, test_image_features]))

print(f"✅ Extracted and saved image features: {train_image_features.shape}, {test_image_features.shape}")


# Load pre-extracted fine-tuned ResNet image features
image_features = np.load("fine_tuned_image_features.npy")

# Ensure image features match text features
min_train_size = min(len(train_text_features), len(image_features[:len(train_text_features)]))
min_test_size = min(len(test_text_features), len(image_features[len(train_text_features):]))

# Trim datasets to match sizes
train_text_features = train_text_features[:min_train_size]
train_image_features = image_features[:min_train_size]

test_text_features = test_text_features[:min_test_size]
test_image_features = image_features[len(train_text_features):len(train_text_features) + min_test_size]

# Concatenate text and image features
X_train_fusion = np.hstack((train_text_features, train_image_features))
X_test_fusion = np.hstack((test_text_features, test_image_features))

# Ensure labels match adjusted sizes
y_train_fusion = np.array(train_labels[:min_train_size])
y_test_fusion = np.array(test_labels[:min_test_size])

print(f"✅ Fusion features created: {X_train_fusion.shape}, {X_test_fusion.shape}")

✅ Extracted text features: (3895, 768), (974, 768)
✅ Extracted and saved image features: (3895, 3), (974, 3)
✅ Fusion features created: (3895, 771), (974, 771)


In [18]:
# Step 9: Train the Fusion Model
# Convert fusion features and labels to PyTorch tensors
X_train_tensor = torch.tensor(X_train_fusion, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_fusion, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_fusion, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_fusion, dtype=torch.long)

# Create DataLoaders
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Define a simple neural network for fusion classification
class FusionSentimentClassifier(nn.Module):
    def __init__(self, input_dim, output_dim=3):
        super(FusionSentimentClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Initialize model
input_dim = X_train_fusion.shape[1]
fusion_model = FusionSentimentClassifier(input_dim)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(fusion_model.parameters(), lr=0.001)

# Train the fusion model
num_epochs = 5
for epoch in range(num_epochs):
    fusion_model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = fusion_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

print("✅ Fusion model training complete!")

Epoch 1, Loss: 134.6386
Epoch 2, Loss: 134.0744
Epoch 3, Loss: 134.0062
Epoch 4, Loss: 133.8445
Epoch 5, Loss: 133.8328
✅ Fusion model training complete!


In [19]:
# Step 10: Fusion Model Evaluation
fusion_model.eval()
y_pred = []
y_true = []

# Run prediction
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = fusion_model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.tolist())
        y_true.extend(targets.tolist())

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

# Print results
print("✅ Fusion Model Evaluation Complete!")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

✅ Fusion Model Evaluation Complete!
Accuracy: 0.3953
Precision: 0.3341
Recall: 0.3953
F1 Score: 0.3112
