In [23]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

import torch
import torch.nn as nn
from torch.nn.functional import softmax
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizer, BertForSequenceClassification
from torchvision import transforms, models
from PIL import Image

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

DATA_PATH = r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\data\training\multimodal_splits\text_image_dataset.csv"

TEXT_MODEL_PATH = r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\models\text_baseline\indobert-base-p1"
IMAGE_MODEL_PATH = r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\models\image_baseline\best_mobilenetv3_tf_style.pth"

MAX_LEN = 128
BATCH_SIZE = 16


Device: cuda


In [24]:
df = pd.read_csv(DATA_PATH)

label_map = {"hoax": 0, "valid": 1}
df["label_int"] = df["label"].map(label_map)

print(df.head())
print("Total data:", len(df))
print("Label distrib total:\n", df["label_int"].value_counts())

train_df, temp_df = train_test_split(
    df,
    test_size=0.3,                
    stratify=df["label_int"],
    random_state=42,
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,                
    stratify=temp_df["label_int"],
    random_state=42,
)

for name, part in [("Train", train_df), ("Val", val_df), ("Test", test_df)]:
    print(f"{name} size: {len(part)}")
    print(part["label_int"].value_counts(), "\n")


  sample_id  label     data_source  confidence  sample_weight  \
0    NEWS_1  valid  original_valid         1.0            1.0   
1    NEWS_2  valid  original_valid         1.0            1.0   
2    NEWS_3  valid  original_valid         1.0            1.0   
3    NEWS_4  valid  original_valid         1.0            1.0   
4    NEWS_5  valid  original_valid         1.0            1.0   

                                               title  \
0  Profil Menko Hukum HAM Yusril Ihza Mahendra di...   
1  Profil Menteri PPPA Arifatul Choiri Fauzi di K...   
2  Prabowo Tunjuk Yassierli Jadi Menteri Ketenaga...   
3  Profil Yassierli, Menteri Ketenagakerjaan Kabi...   
4  Apa yang Harus Dilakukan di Usia 30 Tahun untu...   

                                        text_content  \
0  Profil Yusril Ihza Mahendra kembali menjadi so...   
1  Profil Arifatul Choiri Fauzi menjadi sorotan s...   
2  Presiden Prabowo Subianto menunjuk Guru Besar ...   
3  , 20 Oktober 2024, 22:46 WIB Erwina Rachmi Pu

In [25]:
tokenizer = BertTokenizer.from_pretrained(TEXT_MODEL_PATH)
text_model = BertForSequenceClassification.from_pretrained(TEXT_MODEL_PATH).to(DEVICE)
text_model.eval()

class TextDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = str(row["title"])    
        label = int(row["label_int"])

        enc = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )

        return {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

def text_collate_fn(batch):
    input_ids = torch.stack([b["input_ids"] for b in batch])
    attn = torch.stack([b["attention_mask"] for b in batch])
    labels = torch.stack([b["label"] for b in batch])
    return {"input_ids": input_ids, "attention_mask": attn, "label": labels}


In [26]:
img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],    
        std=[0.229, 0.224, 0.225],
    ),
])

class ImageDataset(Dataset):
    def __init__(self, df, transform=None, img_size=(224, 224)):
        self.df = df.reset_index(drop=True)
        self.transform = transform or (lambda x: x)
        self.img_size = img_size

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        rel_path = row["image_path"]     
        img_path = os.path.join(
            r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection",
            rel_path.lstrip("./\\")
        )
        label = int(row["label_int"])

        try:
            img = Image.open(img_path).convert("RGB")
        except (FileNotFoundError, OSError):
            img = Image.new("RGB", self.img_size, color=(128, 128, 128))

        img_t = self.transform(img)
        return {"image": img_t, "label": torch.tensor(label, dtype=torch.long)}

def image_collate_fn(batch):
    imgs = torch.stack([b["image"] for b in batch])
    labels = torch.stack([b["label"] for b in batch])
    return {"image": imgs, "label": labels}


In [27]:
# Text
train_text_ds = TextDataset(train_df, tokenizer, MAX_LEN)
val_text_ds   = TextDataset(val_df, tokenizer, MAX_LEN)
test_text_ds  = TextDataset(test_df, tokenizer, MAX_LEN)

train_text_loader = DataLoader(train_text_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=text_collate_fn)
val_text_loader   = DataLoader(val_text_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=text_collate_fn)
test_text_loader  = DataLoader(test_text_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=text_collate_fn)

# Image
train_img_ds = ImageDataset(train_df, transform=img_transform)
val_img_ds   = ImageDataset(val_df,   transform=img_transform)
test_img_ds  = ImageDataset(test_df,  transform=img_transform)

train_img_loader = DataLoader(train_img_ds, batch_size=BATCH_SIZE, shuffle=False, collate_fn=image_collate_fn)
val_img_loader   = DataLoader(val_img_ds,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=image_collate_fn)
test_img_loader  = DataLoader(test_img_ds,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=image_collate_fn)


In [28]:
def create_mobilenetv3_tf_style(num_classes=2):
   
    model = models.mobilenet_v3_small(weights=None)
    in_features = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(in_features, num_classes)
    return model

image_model = create_mobilenetv3_tf_style(num_classes=2)
img_state = torch.load(IMAGE_MODEL_PATH, map_location=DEVICE)
image_model.load_state_dict(img_state)
image_model.to(DEVICE)
image_model.eval()


  img_state = torch.load(IMAGE_MODEL_PATH, map_location=DEVICE)


MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

In [29]:
def get_text_probs(loader):
    all_probs = []
    text_model.eval()
    with torch.no_grad():
        for batch in loader:
            input_ids = batch["input_ids"].to(DEVICE)
            attn = batch["attention_mask"].to(DEVICE)
            outputs = text_model(input_ids=input_ids, attention_mask=attn)
            logits = outputs.logits
            probs = softmax(logits, dim=-1)[:, 0]  # prob hoax
            all_probs.extend(probs.cpu().numpy().tolist())
    return np.array(all_probs)

def get_image_probs(loader):
    all_probs = []
    image_model.eval()
    with torch.no_grad():
        for batch in loader:
            imgs = batch["image"].to(DEVICE)
            logits = image_model(imgs)
            probs = softmax(logits, dim=-1)[:, 0]  # prob hoax
            all_probs.extend(probs.cpu().numpy().tolist())
    return np.array(all_probs)


In [30]:
# Prob
p_text_train = get_text_probs(train_text_loader)
p_text_val   = get_text_probs(val_text_loader)
p_text_test  = get_text_probs(test_text_loader)

p_img_train = get_image_probs(train_img_loader)
p_img_val   = get_image_probs(val_img_loader)
p_img_test  = get_image_probs(test_img_loader)

y_train = train_df["label_int"].to_numpy()
y_val   = val_df["label_int"].to_numpy()
y_test  = test_df["label_int"].to_numpy()

def evaluate_probs(p_hoax, y_true, thr=0.5):
    y_pred = np.where(p_hoax >= thr, 0, 1)  # 0=hoax,1=valid
    acc = accuracy_score(y_true, y_pred)
    f1_hoax = f1_score(y_true, y_pred, pos_label=0)
    f1_valid = f1_score(y_true, y_pred, pos_label=1)
    f1_macro = f1_score(y_true, y_pred, average="macro")
    return acc, f1_hoax, f1_valid, f1_macro

def fusion_two(p_a, p_b, alpha):
    return alpha * p_a + (1 - alpha) * p_b


In [31]:
alphas = np.linspace(0, 1, 11)

records_ti = []
for a in alphas:
    p_ti_val = fusion_two(p_text_val, p_img_val, a)
    acc, f1h, f1v, f1m = evaluate_probs(p_ti_val, y_val)
    records_ti.append((a, acc, f1h, f1v, f1m))

df_ti = pd.DataFrame(records_ti, columns=["alpha_text", "acc", "f1_hoax", "f1_valid", "f1_macro"])
print(df_ti.sort_values("f1_macro", ascending=False).head())

best_row_ti = df_ti.sort_values("f1_macro", ascending=False).iloc[0]
alpha_text_best = float(best_row_ti["alpha_text"])
print("Best alpha_text (text+image):", alpha_text_best)

p_ti_test = fusion_two(p_text_test, p_img_test, alpha_text_best)


   alpha_text       acc   f1_hoax  f1_valid  f1_macro
2         0.2  0.971487  0.961538  0.977346  0.969442
4         0.4  0.971487  0.961326  0.977419  0.969373
1         0.1  0.969450  0.958678  0.975767  0.967223
3         0.3  0.969450  0.958678  0.975767  0.967223
5         0.5  0.969450  0.958217  0.975923  0.967070
Best alpha_text (text+image): 0.2


In [32]:
# Text-only
acc_t, f1h_t, f1v_t, f1m_t = evaluate_probs(p_text_test, y_test)

# Image-only
acc_i, f1h_i, f1v_i, f1m_i = evaluate_probs(p_img_test, y_test)

# Text+Image
acc_ti, f1h_ti, f1v_ti, f1m_ti = evaluate_probs(p_ti_test, y_test)

print("=== Test Results (Text+Image) ===")
print("Text      : acc={:.4f} f1h={:.4f} f1v={:.4f} f1m={:.4f}".format(acc_t,  f1h_t,  f1v_t,  f1m_t))
print("Image     : acc={:.4f} f1h={:.4f} f1v={:.4f} f1m={:.4f}".format(acc_i,  f1h_i,  f1v_i,  f1m_i))
print("Text+Img  : acc={:.4f} f1h={:.4f} f1v={:.4f} f1m={:.4f}".format(acc_ti, f1h_ti, f1v_ti, f1m_ti))

print("\nClassification report (Text+Img):")
y_pred_ti = np.where(p_ti_test >= 0.5, 0, 1)
print(classification_report(y_test, y_pred_ti, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_ti))


=== Test Results (Text+Image) ===
Text      : acc=0.9348 f1h=0.9086 f1v=0.9494 f1m=0.9290
Image     : acc=0.9491 f1h=0.9304 f1v=0.9599 f1m=0.9451
Text+Img  : acc=0.9572 f1h=0.9412 f1v=0.9664 f1m=0.9538

Classification report (Text+Img):
              precision    recall  f1-score   support

           0     0.9655    0.9180    0.9412       183
           1     0.9527    0.9805    0.9664       308

    accuracy                         0.9572       491
   macro avg     0.9591    0.9493    0.9538       491
weighted avg     0.9575    0.9572    0.9570       491

Confusion matrix:
 [[168  15]
 [  6 302]]


In [None]:
import joblib

class TextImageLateFusion:
    def __init__(self, alpha_text=0.0):
        self.alpha_text = float(alpha_text)
        self.alpha_img = 1.0 - float(alpha_text)

    def fusion(self, p_text_hoax, p_img_hoax):
        p_hoax = self.alpha_text * p_text_hoax + self.alpha_img * p_img_hoax
        p_valid = 1.0 - p_hoax
        return np.vstack([p_hoax, p_valid]).T

    def predict(self, p_text_hoax, p_img_hoax, thr=0.5):
        probs = self.fusion(p_text_hoax, p_img_hoax)
        return (probs[:, 0] < thr).astype(int)

fusion_ti = TextImageLateFusion(alpha_text=alpha_text_best)
fname = rf"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\models\fusion_final\text_image_late_fusion_a{alpha_text_best:.2f}.joblib"
joblib.dump(fusion_ti, fname)
print("Saved text+image fusion model to:", fname)


Saved text+image fusion model to: D:\INDONERIS-DATAMINING\multimodal-hoax-detection\models\fusion_final\2text_image_late_fusion_a0.20.joblib
