<a href="https://colab.research.google.com/github/tasnimislamraisa/Python_Learning/blob/deep-Learning/Multimodal_Fake_News_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [124]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [125]:
!pip install -q transformers torchvision scikit-learn


In [126]:
import os, json, random
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms, models
from transformers import BertTokenizer, BertModel
from tqdm import tqdm

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [127]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

Using device: cuda


In [128]:
# Reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# **Set dataset paths in Colab**

In [129]:
BASE_DIR = "/content/drive/MyDrive/FakeNews_Multimodal_Dataset/fakeddit_subset"

TRAIN_JSON = os.path.join(BASE_DIR, "training_data_fakeddit.jsonl")
VAL_JSON   = os.path.join(BASE_DIR, "validation_data_fakeddit.jsonl")

TRAIN_IMG_DIR = os.path.join(BASE_DIR, "image_folder")
VAL_IMG_DIR   = os.path.join(BASE_DIR, "validation_image")

In [130]:
print("TRAIN_JSON exists:", os.path.exists(TRAIN_JSON))
print("VAL_JSON exists:", os.path.exists(VAL_JSON))
print("Train images:", len(os.listdir(TRAIN_IMG_DIR)))
print("Val images:", len(os.listdir(VAL_IMG_DIR)))

TRAIN_JSON exists: True
VAL_JSON exists: True
Train images: 4000
Val images: 4210


# **Load JSON files**

In [131]:
def load_jsonl(path):
    rows = []
    with open(path, "r") as f:
        for line in f:
            rows.append(json.loads(line))
    return pd.DataFrame(rows)

train_df_raw = load_jsonl(TRAIN_JSON)
val_df_raw   = load_jsonl(VAL_JSON)

print("Raw train columns:", train_df_raw.columns)
print("Raw val columns:", val_df_raw.columns)

Raw train columns: Index(['contents'], dtype='object')
Raw val columns: Index(['contents'], dtype='object')


In [132]:
def parse_contents(df):
    records = []
    for row in df["contents"]:
        # each row is like: [ {role:user, parts:[fileData,text]}, {role:model, parts:[text]} ]
        user_part = row[0]
        model_part = row[1]

        image = None
        text = None

        for part in user_part.get("parts", []):
            if "fileData" in part:
                image = part["fileData"]["fileUri"].split("/")[-1]
            if "text" in part:
                text = part["text"]

        label_text = model_part["parts"][0]["text"].strip().lower()
        label = 1 if label_text == "yes" else 0

        if image and text:
            records.append({"image": image, "text": text, "label": label})
    return pd.DataFrame(records)

train_df = parse_contents(train_df_raw)
val_df   = parse_contents(val_df_raw)

print("Parsed train:", train_df.shape, "Parsed val:", val_df.shape)
print(train_df["label"].value_counts())

Parsed train: (4000, 3) Parsed val: (4210, 3)
label
0    2291
1    1709
Name: count, dtype: int64


In [133]:
labels_np = train_df["label"].values
class_weights = compute_class_weight(class_weight="balanced", classes=np.array([0,1]), y=labels_np)
class_weights = torch.tensor(class_weights, dtype=torch.float32).to(DEVICE)
print("Class weights [Real, Fake]:", class_weights.tolist())


Class weights [Real, Fake]: [0.8729812502861023, 1.1702749729156494]


In [134]:
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

In [135]:
import re

def clean_text(t):
    m = re.search(r'Title:"(.*?)"', t, flags=re.IGNORECASE|re.DOTALL)
    if m:
        return m.group(1).strip()
    return t.strip()

train_df["text"] = train_df["text"].apply(clean_text)
val_df["text"]   = val_df["text"].apply(clean_text)


In [136]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [137]:
class MultimodalDataset(Dataset):
    def __init__(self, df, image_dir, tokenizer, transform, max_len=128):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.tokenizer = tokenizer
        self.transform = transform
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Text
        enc = self.tokenizer(
            row["text"],
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        # Image (safe)
        img_path = os.path.join(self.image_dir, row["image"])
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception:
            img = Image.new("RGB", (224, 224), (0, 0, 0))

        img = self.transform(img)

        return {
            "image": img,
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "label": torch.tensor(row["label"], dtype=torch.long)
        }


**snopes_medical data**

# **Load and inspect the dataset**

In [138]:
BATCH_SIZE = 8
NUM_WORKERS = 2

train_dataset = MultimodalDataset(train_df, TRAIN_IMG_DIR, tokenizer, train_transform)
val_dataset   = MultimodalDataset(val_df,   VAL_IMG_DIR,   tokenizer, val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

***Clean & preprocess***

# **Trainâ€“Validation Split**

In [139]:
class MultimodalFakeNewsModel(nn.Module):
    def __init__(self, dropout=0.4):
        super().__init__()

        # Text encoder
        self.bert = BertModel.from_pretrained("bert-base-uncased")

        # Image encoder
       # self.cnn = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        # Replace classifier head to get 256-dim image features
        #self.cnn.fc = nn.Linear(512, 256)

        from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

        self.cnn = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
        in_f = self.cnn.classifier[1].in_features
        self.cnn.classifier[1] = nn.Linear(in_f, 256)


        # Project text CLS -> 256
        self.text_fc = nn.Linear(768, 256)

        # Fusion: concat -> 512, then gate
        self.gate = nn.Sequential(
            nn.Linear(512, 256),
            nn.Sigmoid()
        )

        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 2)
        )

    def forward(self, image, input_ids, attention_mask):
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        text_cls = bert_out.last_hidden_state[:, 0, :]  # CLS embedding
        text_feat = self.text_fc(text_cls)              # (B,256)

        img_feat = self.cnn(image)                      # (B,256)

        fused = torch.cat([text_feat, img_feat], dim=1) # (B,512)
        g = self.gate(fused)                            # (B,256)
        fused = torch.cat([text_feat * g, img_feat * (1 - g)], dim=1)  # gated mixing (B,512)

        return self.classifier(fused)

In [140]:
model = MultimodalFakeNewsModel().to(DEVICE)

In [141]:
print(model)


MultimodalFakeNewsModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, e

In [142]:
# -------------------------
# BERT: freeze all, unfreeze last 4
# -------------------------
for p in model.bert.parameters():
    p.requires_grad = False

for layer in model.bert.encoder.layer[-4:]:
    for p in layer.parameters():
        p.requires_grad = True


# -------------------------
# EfficientNet: freeze all, unfreeze last blocks + classifier
# -------------------------
for p in model.cnn.parameters():
    p.requires_grad = False

# Unfreeze last 2 feature blocks (strong but still stable)
for p in model.cnn.features[-2:].parameters():
    p.requires_grad = True

# Unfreeze classifier head
for p in model.cnn.classifier.parameters():
    p.requires_grad = True


# -------------------------
# Always train fusion/classifier layers
# -------------------------
for p in model.text_fc.parameters():
    p.requires_grad = True
for p in model.gate.parameters():
    p.requires_grad = True
for p in model.classifier.parameters():
    p.requires_grad = True


In [143]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, targets):
        ce = nn.functional.cross_entropy(logits, targets, reduction='none', weight=self.alpha)
        pt = torch.exp(-ce)
        loss = ((1 - pt) ** self.gamma) * ce
        return loss.mean()


In [144]:
criterion = FocalLoss(alpha=class_weights, gamma=2.0)


In [145]:
optimizer = torch.optim.AdamW([
    {"params": model.bert.encoder.layer[-4:].parameters(), "lr": 2e-5},

    # EfficientNet last blocks + classifier
    {"params": model.cnn.features[-2:].parameters(),       "lr": 1e-4},
    {"params": model.cnn.classifier.parameters(),          "lr": 1e-4},

    # Fusion head
    {"params": model.text_fc.parameters(),                 "lr": 1e-4},
    {"params": model.gate.parameters(),                    "lr": 1e-4},
    {"params": model.classifier.parameters(),              "lr": 1e-4},
], weight_decay=0.01)


In [146]:
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print("Trainable:", trainable, "Total:", total)


Trainable: 30268850 Total: 114277758


In [147]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", factor=0.5, patience=1
)


# **Training Loop**

In [148]:
def train_one_epoch(model, loader):
    model.train()
    total_loss = 0.0
    correct = 0

    for batch in tqdm(loader, desc="Train", leave=False):
        optimizer.zero_grad()

        image = batch["image"].to(DEVICE)
        input_ids = batch["input_ids"].to(DEVICE)
        attn = batch["attention_mask"].to(DEVICE)
        labels = batch["label"].to(DEVICE)

        logits = model(image, input_ids, attn)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (logits.argmax(1) == labels).sum().item()

    return total_loss / len(loader), correct / len(loader.dataset)

@torch.no_grad()
def eval_one_epoch(model, loader):
    model.eval()
    total_loss = 0.0
    correct = 0
    all_preds, all_labels = [], []

    for batch in tqdm(loader, desc="Val", leave=False):
        image = batch["image"].to(DEVICE)
        input_ids = batch["input_ids"].to(DEVICE)
        attn = batch["attention_mask"].to(DEVICE)
        labels = batch["label"].to(DEVICE)

        logits = model(image, input_ids, attn)
        loss = criterion(logits, labels)

        total_loss += loss.item()
        preds = logits.argmax(1)

        correct += (preds == labels).sum().item()
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    acc = correct / len(loader.dataset)
    return total_loss / len(loader), acc, np.array(all_labels), np.array(all_preds)

# **Train Model**

In [149]:
# 13) TRAIN WITH EARLY STOPPING (ON VAL ACC)
# =========================
EPOCHS = 10
best_val_acc = 0.0
patience = 2
bad_epochs = 0

for epoch in range(1, EPOCHS + 1):
    tr_loss, tr_acc = train_one_epoch(model, train_loader)
    va_loss, va_acc, y_true, y_pred = eval_one_epoch(model, val_loader)

    print(f"\nEpoch {epoch}/{EPOCHS}")
    print(f"Train Loss: {tr_loss:.4f} | Train Acc: {tr_acc:.4f}")
    print(f"Val   Loss: {va_loss:.4f} | Val   Acc: {va_acc:.4f}")

    # Step scheduler on val acc
    scheduler.step(va_acc)

    # Early stopping + save best
    if va_acc > best_val_acc:
        best_val_acc = va_acc
        bad_epochs = 0
        torch.save(model.state_dict(), "best_multimodal_model.pt")
        print("âœ… Saved best model.")
    else:
        bad_epochs += 1
        if bad_epochs >= patience:
            print("ðŸ›‘ Early stopping triggered.")
            break

print("\nBest Val Acc:", best_val_acc)




Epoch 1/10
Train Loss: 0.1451 | Train Acc: 0.6760
Val   Loss: 0.1216 | Val   Acc: 0.7762
âœ… Saved best model.





Epoch 2/10
Train Loss: 0.1095 | Train Acc: 0.8017
Val   Loss: 0.1263 | Val   Acc: 0.8021
âœ… Saved best model.





Epoch 3/10
Train Loss: 0.0837 | Train Acc: 0.8612
Val   Loss: 0.1290 | Val   Acc: 0.8012


                                                      


Epoch 4/10
Train Loss: 0.0566 | Train Acc: 0.9040
Val   Loss: 0.1693 | Val   Acc: 0.7945
ðŸ›‘ Early stopping triggered.

Best Val Acc: 0.8021377672209026




In [150]:
model.load_state_dict(torch.load("best_multimodal_model.pt", map_location=DEVICE))
_, final_acc, y_true, y_pred = eval_one_epoch(model, val_loader)

print("\nFinal Val Accuracy:", final_acc)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Real", "Fake"], digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

                                                      


Final Val Accuracy: 0.8021377672209026

Classification Report:
              precision    recall  f1-score   support

        Real     0.7856    0.8876    0.8335      2349
        Fake     0.8303    0.6943    0.7562      1861

    accuracy                         0.8021      4210
   macro avg     0.8080    0.7909    0.7949      4210
weighted avg     0.8054    0.8021    0.7993      4210

Confusion Matrix:
[[2085  264]
 [ 569 1292]]




In [151]:
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import f1_score

@torch.no_grad()
def get_probs(model, loader):
    model.eval()
    probs, labels = [], []
    for batch in loader:
        logits = model(
            batch["image"].to(DEVICE),
            batch["input_ids"].to(DEVICE),
            batch["attention_mask"].to(DEVICE)
        )
        p_fake = F.softmax(logits, dim=1)[:, 1].cpu().numpy()
        probs.extend(p_fake)
        labels.extend(batch["label"].cpu().numpy())
    return np.array(probs), np.array(labels)

probs, y_true = get_probs(model, val_loader)

best_t, best_f1 = 0.5, 0
for t in np.linspace(0.1, 0.9, 81):
    y_pred = (probs >= t).astype(int)
    f1 = f1_score(y_true, y_pred, pos_label=1)
    if f1 > best_f1:
        best_f1 = f1
        best_t = t

print("Best threshold:", best_t)
print("Best Fake F1:", best_f1)


Best threshold: 0.45000000000000007
Best Fake F1: 0.7838199839271364


In [152]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_thr = (probs >= best_t).astype(int)

print(classification_report(y_true, y_pred_thr, target_names=["Real","Fake"], digits=4))
print(confusion_matrix(y_true, y_pred_thr))


              precision    recall  f1-score   support

        Real     0.8298    0.8259    0.8278      2349
        Fake     0.7815    0.7861    0.7838      1861

    accuracy                         0.8083      4210
   macro avg     0.8056    0.8060    0.8058      4210
weighted avg     0.8084    0.8083    0.8084      4210

[[1940  409]
 [ 398 1463]]
