In [1]:
import os
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchaudio

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score

# ===== KONFIGURASI =====
CSV_PATH = r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\data\training\multimodal_splits\audio_only_dataset.csv"
AUDIO_BASE_DIR = r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection" 

TARGET_SR = 16000
SEGMENT_SEC = 10
BATCH_SIZE = 8
MAX_EPOCHS = 40

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


In [2]:
df = pd.read_csv(CSV_PATH)

label_map = {"hoax": 0, "valid": 1}
df["y"] = df["label"].map(label_map)

print("Total label distribusi:\n", df["y"].value_counts())

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["y"],
    random_state=42,
)

print("Train size:", len(train_df), "Val size:", len(val_df))
print("Train label dist:\n", train_df["y"].value_counts())
print("Val   label dist:\n", val_df["y"].value_counts())


Total label distribusi:
 y
1    156
0     52
Name: count, dtype: int64
Train size: 166 Val size: 42
Train label dist:
 y
1    125
0     41
Name: count, dtype: int64
Val   label dist:
 y
1    31
0    11
Name: count, dtype: int64


In [3]:
# Mel-spectrogram transform
N_MELS = 64
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=TARGET_SR,
    n_fft=400,
    hop_length=160,
    win_length=400,
    n_mels=N_MELS,
)
amplitude_to_db = torchaudio.transforms.AmplitudeToDB()

def load_center_segment(path, segment_sec=SEGMENT_SEC):
    wav, sr = torchaudio.load(path)
    if sr != TARGET_SR:
        wav = torchaudio.functional.resample(wav, sr, TARGET_SR)
    if wav.shape[0] > 1:
        wav = wav.mean(dim=0, keepdim=True)

    num_samples = wav.shape[1]
    seg_len = int(TARGET_SR * segment_sec)

    if num_samples <= seg_len:
        pad_len = seg_len - num_samples
        wav = nn.functional.pad(wav, (0, pad_len))
    else:
        center = num_samples // 2
        start = max(0, center - seg_len // 2)
        end = start + seg_len
        if end > num_samples:
            end = num_samples
            start = end - seg_len
        wav = wav[:, start:end]

    return wav  # [1, seg_len]

def wav_to_logmel(wav):
    mel = mel_spectrogram(wav)
    mel_db = amplitude_to_db(mel)
    mean = mel_db.mean()
    std = mel_db.std() + 1e-9
    mel_db = (mel_db - mean) / std
    return mel_db


In [4]:
class AudioHoaxDataset(Dataset):
    def __init__(self, df, base_dir=AUDIO_BASE_DIR):
        self.df = df.reset_index(drop=True)
        self.base_dir = base_dir

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        audio_path = os.path.join(self.base_dir, row["audio_path"])

        wav = load_center_segment(audio_path)
        feat = wav_to_logmel(wav)         # [1, F, T]
        label = torch.tensor(row["y"], dtype=torch.long)
        return feat, label

train_ds2 = AudioHoaxDataset(train_df)
val_ds2   = AudioHoaxDataset(val_df)

train_loader2 = DataLoader(
    train_ds2,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
)

val_loader2 = DataLoader(
    val_ds2,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
)


In [5]:
class AudioCNNModel2(nn.Module):
    def __init__(self, n_mels=N_MELS, num_classes=2):
        super().__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),

            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((2, 2)),
        )
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(16, num_classes),
        )

    def forward(self, x):
        h = self.conv_block(x)        # [B, 32, F', T']
        h = self.global_pool(h)       # [B, 32, 1, 1]
        h = h.view(h.size(0), -1)     # [B, 32]
        logits = self.fc(h)           # [B, 2]
        return logits

model2 = AudioCNNModel2().to(device)
print(model2)


AudioCNNModel2(
  (conv_block): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=16, out_features=2, bias=True)
  )
)


In [6]:
# Hitung class weight ringan: hoax sedikit lebih berat
global_counts = df["y"].value_counts().to_dict()
num_hoax  = global_counts[0]
num_valid = global_counts[1]
print("num_hoax:", num_hoax, "num_valid:", num_valid)

# proporsi
total = num_hoax + num_valid
p_hoax  = num_hoax / total
p_valid = num_valid / total
print("p_hoax:", p_hoax, "p_valid:", p_valid)

# weight ringan (bisa kamu adjust): lebih beratkan hoax sedikit
w_hoax  = 1.2   # kelas 0
w_valid = 0.8   # kelas 1
class_weights = torch.tensor([w_hoax, w_valid], dtype=torch.float32, device=device)
print("class_weights (hoax=0, valid=1):", class_weights)

criterion2 = nn.CrossEntropyLoss(weight=class_weights)

optimizer2 = torch.optim.Adam(
    model2.parameters(),
    lr=1e-4,           # bisa diturunkan ke 5e-5 jika masih labil
    weight_decay=1e-4,
)

scheduler2 = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer2,
    mode="max",
    factor=0.5,
    patience=3,
    verbose=True,
    min_lr=1e-6,
)


num_hoax: 52 num_valid: 156
p_hoax: 0.25 p_valid: 0.75
class_weights (hoax=0, valid=1): tensor([1.2000, 0.8000], device='cuda:0')




In [7]:
def evaluate_model2(return_metrics=False):
    model2.eval()
    all_y = []
    all_pred = []

    with torch.no_grad():
        for feats, labels in val_loader2:
            feats = feats.to(device)
            labels = labels.to(device)

            logits = model2(feats)
            preds = torch.argmax(logits, dim=-1)

            all_y.extend(labels.cpu().numpy().tolist())
            all_pred.extend(preds.cpu().numpy().tolist())

    print(classification_report(all_y, all_pred, digits=4))

    f1_valid = f1_score(all_y, all_pred, pos_label=1)
    f1_hoax  = f1_score(all_y, all_pred, pos_label=0)
    f1_macro = f1_score(all_y, all_pred, average="macro")

    print("F1 valid:", f1_valid, "F1 hoax:", f1_hoax, "F1 macro:", f1_macro)

    if return_metrics:
        return f1_valid, f1_hoax, f1_macro


In [8]:
best_f1_macro_2 = 0.0
patience2 = 7
no_improve2 = 0

for epoch in range(1, MAX_EPOCHS + 1):
    model2.train()
    total_loss = 0.0

    for feats, labels in train_loader2:
        feats = feats.to(device)
        labels = labels.to(device)

        logits = model2(feats)
        loss = criterion2(logits, labels)

        optimizer2.zero_grad()
        loss.backward()
        optimizer2.step()

        total_loss += loss.item() * labels.size(0)

    avg_loss = total_loss / len(train_ds2)
    print(f"\n[Model 2] Epoch {epoch}, train loss: {avg_loss:.4f}")

    f1_valid, f1_hoax, f1_macro = evaluate_model2(return_metrics=True)

    scheduler2.step(f1_macro)

    if f1_macro > best_f1_macro_2:
        best_f1_macro_2 = f1_macro
        no_improve2 = 0
        torch.save(model2.state_dict(), r"D:\INDONERIS-DATAMINING\multimodal-hoax-detection\models\audio_baseline/best_audio_cnn_model2.pt")
        print("  >> new best model2 saved")
    else:
        no_improve2 += 1
        print("  no_improve2 count:", no_improve2)

    if no_improve2 >= patience2:
        print("Early stopping for Model 2 triggered.")
        break

print("Best F1 macro (Model 2):", best_f1_macro_2)



[Model 2] Epoch 1, train loss: 0.7910


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.2619    1.0000    0.4151        11
           1     0.0000    0.0000    0.0000        31

    accuracy                         0.2619        42
   macro avg     0.1310    0.5000    0.2075        42
weighted avg     0.0686    0.2619    0.1087        42

F1 valid: 0.0 F1 hoax: 0.41509433962264153 F1 macro: 0.20754716981132076
  >> new best model2 saved

[Model 2] Epoch 2, train loss: 0.7522


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.2619    1.0000    0.4151        11
           1     0.0000    0.0000    0.0000        31

    accuracy                         0.2619        42
   macro avg     0.1310    0.5000    0.2075        42
weighted avg     0.0686    0.2619    0.1087        42

F1 valid: 0.0 F1 hoax: 0.41509433962264153 F1 macro: 0.20754716981132076
  no_improve2 count: 1

[Model 2] Epoch 3, train loss: 0.7376
              precision    recall  f1-score   support

           0     0.2368    0.8182    0.3673        11
           1     0.5000    0.0645    0.1143        31

    accuracy                         0.2619        42
   macro avg     0.3684    0.4413    0.2408        42
weighted avg     0.4311    0.2619    0.1806        42

F1 valid: 0.11428571428571428 F1 hoax: 0.3673469387755102 F1 macro: 0.24081632653061225
  >> new best model2 saved

[Model 2] Epoch 4, train loss: 0.7122
              precision    recall  f1-score   support

  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        11
           1     0.7381    1.0000    0.8493        31

    accuracy                         0.7381        42
   macro avg     0.3690    0.5000    0.4247        42
weighted avg     0.5448    0.7381    0.6269        42

F1 valid: 0.8493150684931506 F1 hoax: 0.0 F1 macro: 0.4246575342465753
  no_improve2 count: 1

[Model 2] Epoch 8, train loss: 0.6839
              precision    recall  f1-score   support

           0     0.5000    0.0909    0.1538        11
           1     0.7500    0.9677    0.8451        31

    accuracy                         0.7381        42
   macro avg     0.6250    0.5293    0.4995        42
weighted avg     0.6845    0.7381    0.6640        42

F1 valid: 0.8450704225352113 F1 hoax: 0.15384615384615385 F1 macro: 0.49945828819068255
  no_improve2 count: 2

[Model 2] Epoch 9, train loss: 0.6741
              precision    recall  f1-score   support

        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        11
           1     0.7381    1.0000    0.8493        31

    accuracy                         0.7381        42
   macro avg     0.3690    0.5000    0.4247        42
weighted avg     0.5448    0.7381    0.6269        42

F1 valid: 0.8493150684931506 F1 hoax: 0.0 F1 macro: 0.4246575342465753
  no_improve2 count: 4

[Model 2] Epoch 11, train loss: 0.6627


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        11
           1     0.7381    1.0000    0.8493        31

    accuracy                         0.7381        42
   macro avg     0.3690    0.5000    0.4247        42
weighted avg     0.5448    0.7381    0.6269        42

F1 valid: 0.8493150684931506 F1 hoax: 0.0 F1 macro: 0.4246575342465753
  no_improve2 count: 5

[Model 2] Epoch 12, train loss: 0.6533


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        11
           1     0.7381    1.0000    0.8493        31

    accuracy                         0.7381        42
   macro avg     0.3690    0.5000    0.4247        42
weighted avg     0.5448    0.7381    0.6269        42

F1 valid: 0.8493150684931506 F1 hoax: 0.0 F1 macro: 0.4246575342465753
  no_improve2 count: 6

[Model 2] Epoch 13, train loss: 0.6651
              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000        11
           1     0.7381    1.0000    0.8493        31

    accuracy                         0.7381        42
   macro avg     0.3690    0.5000    0.4247        42
weighted avg     0.5448    0.7381    0.6269        42

F1 valid: 0.8493150684931506 F1 hoax: 0.0 F1 macro: 0.4246575342465753
  no_improve2 count: 7
Early stopping for Model 2 triggered.
Best F1 macro (Model 2): 0.6304985337243402


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
