# Imports

In [76]:
!pip install torchmetrics



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip -qq "/content/drive/MyDrive/Colab Notebooks/open.zip"

In [84]:
import librosa
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random
import IPython.display as ipd
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from tqdm.notebook import tqdm  # !!

import torch
import torchmetrics
import os
from torchvision import models

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Config

In [77]:
class Config:
    SR = 32000
    N_MFCC = 13  # 수정됨!!!: N_MFCC를 원래 값으로 되돌림
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 100
    LR = 3e-4
    NUM_HEADS = 4
    NUM_LAYERS = 2
    EMBED_DIM = 64
    EARLY_STOPPING_PATIENCE = 20  # Early stopping patience 설정
    Scheduler_patience=8
    # Others
    SEED = 42

CONFIG = Config()

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [91]:
df = pd.read_csv('./train.csv')
train_df, val_df, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

## Data Pre-processing : MFCC

In [108]:
def load_audio_data(df):  # !!
    audio_data = []  # !!
    labels = []  # !!
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):  # !!
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)  # !!
        audio_data.append(y)  # !!
        label = row['label']  # !!
        label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)  # !!
        label_vector[0 if label == 'fake' else 1] = 1  # !!
        labels.append(label_vector)  # !!
    return audio_data, labels  # !!

In [88]:
def merge_audios_simultaneously(audio1, audio2):
    len1, len2 = len(audio1), len(audio2)
    if len1 < len2:
        audio1 = np.pad(audio1, (0, len2 - len1))
    else:
        audio2 = np.pad(audio2, (0, len1 - len2))
    merged_audio = audio1 + audio2
    merged_audio = merged_audio / np.max(np.abs(merged_audio))  # Normalize the merged audio
    return merged_audio

In [130]:
def get_mfcc_feature_test(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)

        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [115]:
def get_mfcc_feature(df, audio_data, labels, train_mode=True):
    features = []
    label_list = []
    for i in tqdm(range(len(audio_data)), total=len(audio_data)):
        y = audio_data[i]
        mfcc = librosa.feature.mfcc(y=y, sr=CONFIG.SR, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)
        if train_mode:
            label_list.append(labels[i])
    if train_mode:
        return features, label_list
    return features

In [109]:
def get_augmented_features_random(audio_data, labels, num_augmented=50000):  # !!
    augmented_features = []  # !!
    augmented_labels = []  # !!
    augmented_audios = []  # !!

    for i in tqdm(range(num_augmented), desc="Generating augmented data"):  # !!
        idx1, idx2 = random.sample(range(len(audio_data)), 2)  # !!
        audio1, audio2 = audio_data[idx1], audio_data[idx2]  # !!
        merged_audio = merge_audios_simultaneously(audio1, audio2)  # !!
        mfcc = librosa.feature.mfcc(y=merged_audio, sr=CONFIG.SR, n_mfcc=CONFIG.N_MFCC)  # !!
        mfcc = np.mean(mfcc.T, axis=0)  # !!
        augmented_features.append(mfcc)  # !!
        augmented_labels.append(labels[idx1])  # !!
        augmented_audios.append(merged_audio)  # !!

    return augmented_features, augmented_labels, augmented_audios  # !!

In [111]:
# import librosa
# import numpy as np
# import IPython.display as ipd

# def merge_audios_simultaneously(audio1, audio2):
#     len1, len2 = len(audio1), len(audio2)
#     if len1 < len2:
#         audio1 = np.pad(audio1, (0, len2 - len1))
#     else:
#         audio2 = np.pad(audio2, (0, len1 - len2))
#     merged_audio = audio1 + audio2
#     merged_audio = merged_audio / np.max(np.abs(merged_audio))
#     return merged_audio

# # 첫 번째 오디오 파일 경로
# audio_path1 = './train/AAACWKPZ.ogg'
# # 두 번째 오디오 파일 경로
# audio_path2 = './train/AAAQOZYI.ogg'

# # 오디오 파일 불러오기
# audio1, sr1 = librosa.load(audio_path1, sr=None)
# audio2, sr2 = librosa.load(audio_path2, sr=None)

# # 샘플링 레이트가 다를 경우 동일하게 맞춰줌
# if sr1 != sr2:
#     audio2 = librosa.resample(audio2, sr2, sr1)
#     sr = sr1
# else:
#     sr = sr1

# # 두 개의 오디오 파일을 합침
# merged_audio = merge_audios_simultaneously(audio1, audio2)

# # 합쳐진 오디오 재생
# ipd.display(ipd.Audio(merged_audio, rate=sr))

In [113]:
audio_data, labels = load_audio_data(train_df)  # !!
val_audio_data, val_labels = load_audio_data(val_df)  # !!

  0%|          | 0/44350 [00:00<?, ?it/s]

  0%|          | 0/11088 [00:00<?, ?it/s]

In [116]:
train_mfcc, train_labels = get_mfcc_feature(train_df, audio_data, labels, True)
val_mfcc, val_labels = get_mfcc_feature(val_df, val_audio_data, val_labels, True)  # !!
augmented_mfcc, augmented_labels, augmented_audios = get_augmented_features_random(audio_data, labels)  # !!

train_mfcc.extend(augmented_mfcc)  # !!
train_labels.extend(augmented_labels)  # !!

  0%|          | 0/44350 [00:00<?, ?it/s]

  0%|          | 0/11088 [00:00<?, ?it/s]

Generating augmented data:   0%|          | 0/50000 [00:00<?, ?it/s]

1000/50000 augmented audios generated
2000/50000 augmented audios generated
3000/50000 augmented audios generated
4000/50000 augmented audios generated
5000/50000 augmented audios generated
6000/50000 augmented audios generated
7000/50000 augmented audios generated
8000/50000 augmented audios generated
9000/50000 augmented audios generated
10000/50000 augmented audios generated
11000/50000 augmented audios generated
12000/50000 augmented audios generated
13000/50000 augmented audios generated
14000/50000 augmented audios generated
15000/50000 augmented audios generated
16000/50000 augmented audios generated
17000/50000 augmented audios generated
18000/50000 augmented audios generated
19000/50000 augmented audios generated
20000/50000 augmented audios generated
21000/50000 augmented audios generated
22000/50000 augmented audios generated
23000/50000 augmented audios generated
24000/50000 augmented audios generated
25000/50000 augmented audios generated
26000/50000 augmented audios gener

# Dataset

In [117]:
#feature_vector + label을 가지고 CustomDataset 생성
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [118]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [119]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

# Define Model

In [None]:
# class MLP(nn.Module):
#     def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
#         super(MLP, self).__init__()
#         self.fc1 = nn.Linear(input_dim, hidden_dim)
#         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
#         self.fc3 = nn.Linear(hidden_dim, output_dim)
#         self.relu = nn.ReLU()

#     def forward(self, x):
#         x = self.relu(self.fc1(x))
#         x = self.relu(self.fc2(x))
#         x = self.fc3(x)
#         x = torch.sigmoid(x)
#         return x

In [None]:
# class LSTMModel(nn.Module):
#     def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES, num_layers=1):
#         super(LSTMModel, self).__init__()
#         self.hidden_dim = hidden_dim
#         self.num_layers = num_layers
#         self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_dim, output_dim)

#     def forward(self, x):
#         if x.dim() == 2:
#             x = x.unsqueeze(1)  # [batch_size, seq_length, input_dim]으로 변경

#         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
#         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

#         out, _ = self.lstm(x, (h0, c0))

#         out = self.fc(out[:, -1, :])

#         out = torch.sigmoid(out)
#         return out

In [None]:
# class TransformerModel(nn.Module):  # 수정됨!!!
#     def __init__(self, input_dim=CONFIG.N_MFCC, embed_dim=CONFIG.EMBED_DIM, num_heads=CONFIG.NUM_HEADS, num_layers=CONFIG.NUM_LAYERS, output_dim=CONFIG.N_CLASSES):
#         super(TransformerModel, self).__init__()
#         self.embedding = nn.Linear(input_dim, embed_dim)  # 수정됨!!!: 입력 차원을 embed_dim으로 변환
#         self.encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads)
#         self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
#         self.fc = nn.Linear(embed_dim, output_dim)

#     def forward(self, x):
#         if x.dim() == 2:
#             x = x.unsqueeze(1)  # [batch_size, 1, input_dim]으로 변경
#         x = self.embedding(x)  # 수정됨!!!: 입력 차원을 embed_dim으로 변환
#         x = self.transformer_encoder(x)
#         x = self.fc(x[:, -1, :])
#         x = torch.sigmoid(x)
#         return x

In [None]:

# class ResNetModel(nn.Module):  # 수정됨!!!
#     def __init__(self, input_dim=CONFIG.N_MFCC, output_dim=CONFIG.N_CLASSES):
#         super(ResNetModel, self).__init__()
#         self.resnet = models.resnet18(pretrained=True)
#         self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#         self.resnet.fc = nn.Linear(self.resnet.fc.in_features, output_dim)

#     def forward(self, x):
#         x = x.unsqueeze(1).unsqueeze(2)  # [batch_size, 1, 1, input_dim]으로 변경
#         x = self.resnet(x)
#         x = torch.sigmoid(x)
#         return x

In [120]:
# Temporal Attention Layer definition
class TemporalAttention(nn.Module):  # <--
    def __init__(self, input_dim, embed_dim, num_heads):  # <--
        super(TemporalAttention, self).__init__()  # <--
        self.attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)  # <--
        self.linear = nn.Linear(input_dim, embed_dim)  # <--

    def forward(self, x):  # <--
        if x.dim() == 2:  # <--
            x = x.unsqueeze(1)  # [batch_size, seq_len, input_dim] 차원 추가 <--
        x = self.linear(x)  # <--
        x = x.permute(1, 0, 2)  # (seq_len, batch, embed_dim)  # <--
        attn_output, _ = self.attention(x, x, x)  # <--
        attn_output = attn_output.permute(1, 0, 2)  # (batch, seq_len, embed_dim)  # <--
        return attn_output  # <--

# Transformer model with Temporal Attention
class TransformerModel(nn.Module):  # <--
    def __init__(self, input_dim=CONFIG.N_MFCC, embed_dim=CONFIG.EMBED_DIM, num_heads=CONFIG.NUM_HEADS, num_layers=CONFIG.NUM_LAYERS, output_dim=CONFIG.N_CLASSES):  # <--
        super(TransformerModel, self).__init__()  # <--
        self.embedding = nn.Linear(input_dim, embed_dim)  # <--
        self.temporal_attention = TemporalAttention(embed_dim, embed_dim, num_heads)  # <--
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads)  # <--
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)  # <--
        self.fc = nn.Linear(embed_dim, output_dim)  # <--

    def forward(self, x):  # <--
        if x.dim() == 2:  # <--
            x = x.unsqueeze(1)  # [batch_size, seq_len, input_dim] 차원 추가 <--
        x = self.embedding(x)  # <--
        x = self.temporal_attention(x)  # <--
        x = self.transformer_encoder(x)  # <--
        x = self.fc(x[:, -1, :])  # <--
        x = torch.sigmoid(x)  # <--
        return x  # <--

In [121]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        self.val_loss_min = val_loss

# Train & Validation

In [122]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, scheduler, train_loader, val_loader, device):  # <--
    model.to(device)  # <--
    criterion = nn.BCELoss().to(device)  # <--
    early_stopping = EarlyStopping(patience=CONFIG.EARLY_STOPPING_PATIENCE, verbose=True)  # EarlyStopping 인스턴스 생성 <--

    best_val_score = 0
    best_model = None

    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)

            optimizer.zero_grad()

            output = model(features)
            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')

        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model

        # Scheduler step
        scheduler.step(_val_loss)

        early_stopping(_val_loss, model)  # EarlyStopping 호출 <--
        if early_stopping.early_stop:  # <--
            print("Early stopping")  # <--
            break  # <--

    return best_model


def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    val_true = []
    val_pred = []
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)

            output = model(features)
            loss = criterion(output, labels)

            val_loss.append(loss.item())

            val_true.extend(labels.cpu().numpy())
            val_pred.extend(output.cpu().numpy())

    val_loss = np.mean(val_loss)
    val_score = roc_auc_score(val_true, val_pred)
    return val_loss, val_score


## Run

In [123]:
model = TransformerModel()
optimizer = torch.optim.Adam(params=model.parameters(), lr=CONFIG.LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=CONFIG.Scheduler_patience, verbose=True)

infer_model = train(model, optimizer, scheduler, train_loader, val_loader, device)

  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.53846] Val Loss : [0.35256] Val AUC : [0.93008]
Validation loss decreased (inf --> 0.352565).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.48114] Val Loss : [0.30840] Val AUC : [0.96004]
Validation loss decreased (0.352565 --> 0.308398).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.46066] Val Loss : [0.27512] Val AUC : [0.97201]
Validation loss decreased (0.308398 --> 0.275116).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.44246] Val Loss : [0.21116] Val AUC : [0.98475]
Validation loss decreased (0.275116 --> 0.211158).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.42882] Val Loss : [0.22053] Val AUC : [0.98264]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.41867] Val Loss : [0.20203] Val AUC : [0.98591]
Validation loss decreased (0.211158 --> 0.202035).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.41196] Val Loss : [0.19101] Val AUC : [0.98819]
Validation loss decreased (0.202035 --> 0.191006).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.40704] Val Loss : [0.19686] Val AUC : [0.98864]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.40266] Val Loss : [0.19700] Val AUC : [0.98951]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.40010] Val Loss : [0.20148] Val AUC : [0.98325]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.39818] Val Loss : [0.17404] Val AUC : [0.99065]
Validation loss decreased (0.191006 --> 0.174043).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.39357] Val Loss : [0.18685] Val AUC : [0.98757]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.39059] Val Loss : [0.18761] Val AUC : [0.98921]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.38906] Val Loss : [0.15387] Val AUC : [0.99246]
Validation loss decreased (0.174043 --> 0.153873).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.38739] Val Loss : [0.17748] Val AUC : [0.98971]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.38486] Val Loss : [0.15102] Val AUC : [0.99173]
Validation loss decreased (0.153873 --> 0.151015).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.38188] Val Loss : [0.16374] Val AUC : [0.99285]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.38170] Val Loss : [0.16610] Val AUC : [0.99117]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.37843] Val Loss : [0.15583] Val AUC : [0.99230]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.37732] Val Loss : [0.17920] Val AUC : [0.99318]
EarlyStopping counter: 4 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.37593] Val Loss : [0.16627] Val AUC : [0.99104]
EarlyStopping counter: 5 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.37458] Val Loss : [0.16363] Val AUC : [0.99097]
EarlyStopping counter: 6 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.37267] Val Loss : [0.16003] Val AUC : [0.99111]
EarlyStopping counter: 7 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.37239] Val Loss : [0.16989] Val AUC : [0.98781]
EarlyStopping counter: 8 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.36887] Val Loss : [0.16066] Val AUC : [0.99327]
EarlyStopping counter: 9 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.35145] Val Loss : [0.13702] Val AUC : [0.99438]
Validation loss decreased (0.151015 --> 0.137023).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.34731] Val Loss : [0.14083] Val AUC : [0.99381]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.34512] Val Loss : [0.13981] Val AUC : [0.99425]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.34408] Val Loss : [0.13854] Val AUC : [0.99393]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.34280] Val Loss : [0.13506] Val AUC : [0.99413]
Validation loss decreased (0.137023 --> 0.135063).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.34155] Val Loss : [0.13810] Val AUC : [0.99399]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.34049] Val Loss : [0.13389] Val AUC : [0.99387]
Validation loss decreased (0.135063 --> 0.133892).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.33972] Val Loss : [0.13780] Val AUC : [0.99362]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.33901] Val Loss : [0.13538] Val AUC : [0.99361]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.33770] Val Loss : [0.13945] Val AUC : [0.99364]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.33660] Val Loss : [0.13703] Val AUC : [0.99334]
EarlyStopping counter: 4 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.33668] Val Loss : [0.13599] Val AUC : [0.99364]
EarlyStopping counter: 5 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.33577] Val Loss : [0.13602] Val AUC : [0.99401]
EarlyStopping counter: 6 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.33503] Val Loss : [0.13073] Val AUC : [0.99408]
Validation loss decreased (0.133892 --> 0.130731).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.33397] Val Loss : [0.14291] Val AUC : [0.99339]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.33375] Val Loss : [0.13571] Val AUC : [0.99386]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.33220] Val Loss : [0.13842] Val AUC : [0.99366]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.33251] Val Loss : [0.13339] Val AUC : [0.99355]
EarlyStopping counter: 4 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.33132] Val Loss : [0.12929] Val AUC : [0.99422]
Validation loss decreased (0.130731 --> 0.129287).  Saving model ...


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.33085] Val Loss : [0.13096] Val AUC : [0.99392]
EarlyStopping counter: 1 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.33027] Val Loss : [0.13337] Val AUC : [0.99350]
EarlyStopping counter: 2 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.32922] Val Loss : [0.12996] Val AUC : [0.99357]
EarlyStopping counter: 3 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.32932] Val Loss : [0.13436] Val AUC : [0.99294]
EarlyStopping counter: 4 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.32797] Val Loss : [0.13139] Val AUC : [0.99370]
EarlyStopping counter: 5 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.32754] Val Loss : [0.13714] Val AUC : [0.99285]
EarlyStopping counter: 6 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [51], Train Loss : [0.32706] Val Loss : [0.13742] Val AUC : [0.99292]
EarlyStopping counter: 7 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [52], Train Loss : [0.32622] Val Loss : [0.13229] Val AUC : [0.99334]
EarlyStopping counter: 8 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [53], Train Loss : [0.32571] Val Loss : [0.14133] Val AUC : [0.99266]
EarlyStopping counter: 9 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [54], Train Loss : [0.32246] Val Loss : [0.13336] Val AUC : [0.99333]
EarlyStopping counter: 10 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [55], Train Loss : [0.32148] Val Loss : [0.13307] Val AUC : [0.99326]
EarlyStopping counter: 11 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [56], Train Loss : [0.32137] Val Loss : [0.13363] Val AUC : [0.99330]
EarlyStopping counter: 12 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [57], Train Loss : [0.32167] Val Loss : [0.13305] Val AUC : [0.99321]
EarlyStopping counter: 13 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [58], Train Loss : [0.32178] Val Loss : [0.13295] Val AUC : [0.99328]
EarlyStopping counter: 14 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [59], Train Loss : [0.32112] Val Loss : [0.13210] Val AUC : [0.99326]
EarlyStopping counter: 15 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [60], Train Loss : [0.32120] Val Loss : [0.13260] Val AUC : [0.99322]
EarlyStopping counter: 16 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [61], Train Loss : [0.32105] Val Loss : [0.13296] Val AUC : [0.99317]
EarlyStopping counter: 17 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [62], Train Loss : [0.32100] Val Loss : [0.13345] Val AUC : [0.99317]
EarlyStopping counter: 18 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [63], Train Loss : [0.32020] Val Loss : [0.13318] Val AUC : [0.99319]
EarlyStopping counter: 19 out of 20


  0%|          | 0/983 [00:00<?, ?it/s]

  0%|          | 0/116 [00:00<?, ?it/s]

Epoch [64], Train Loss : [0.32111] Val Loss : [0.13316] Val AUC : [0.99320]
EarlyStopping counter: 20 out of 20
Early stopping


## Inference

In [131]:
test_td = pd.read_csv('./test.csv')
test_mfcc = get_mfcc_feature_test(test_td, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

0it [00:00, ?it/s]

In [132]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)

            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [133]:
preds = inference(infer_model, test_loader, device)

  0%|          | 0/521 [00:00<?, ?it/s]

## Submission

In [134]:
submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.999519,0.000497
1,TEST_00001,0.99197,0.008137
2,TEST_00002,0.997211,0.002827
3,TEST_00003,0.331574,0.660311
4,TEST_00004,0.31606,0.674578


In [135]:
submit.to_csv('./baseline_submit.csv', index=False)