# Imports

In [1]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.3.post0-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
!unzip -qq "/content/drive/MyDrive/Colab Notebooks/open.zip"

In [None]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.nn as nn


import torch
import torchmetrics
import os

from torchvision import models  # 수정됨!!!

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Config

In [None]:
class Config:
    SR = 32000
    N_MFCC = 13  # 수정됨!!!: N_MFCC를 원래 값으로 되돌림
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 100
    LR = 3e-4
    NUM_HEADS = 4  # 수정됨!!!: NUM_HEADS를 조정
    NUM_LAYERS = 2  # 수정됨!!!: NUM_LAYERS를 조정
    EMBED_DIM = 64  # 수정됨!!!: EMBED_DIM을 설정
    # Others
    SEED = 42

CONFIG = Config()

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [None]:
df = pd.read_csv('./train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

## Data Pre-processing : MFCC

In [None]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)

        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [None]:
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [24:13, 30.50it/s]
11088it [05:50, 31.63it/s]


# Dataset

In [None]:
#feature_vector + label을 가지고 CustomDataset 생성
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [None]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

# Define Model

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        if x.dim() == 2:
            x = x.unsqueeze(1)  # [batch_size, seq_length, input_dim]으로 변경

        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])

        out = torch.sigmoid(out)
        return out

In [None]:
class TransformerModel(nn.Module):  # 수정됨!!!
    def __init__(self, input_dim=CONFIG.N_MFCC, embed_dim=CONFIG.EMBED_DIM, num_heads=CONFIG.NUM_HEADS, num_layers=CONFIG.NUM_LAYERS, output_dim=CONFIG.N_CLASSES):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)  # 수정됨!!!: 입력 차원을 embed_dim으로 변환
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embed_dim, output_dim)

    def forward(self, x):
        if x.dim() == 2:
            x = x.unsqueeze(1)  # [batch_size, 1, input_dim]으로 변경
        x = self.embedding(x)  # 수정됨!!!: 입력 차원을 embed_dim으로 변환
        x = self.transformer_encoder(x)
        x = self.fc(x[:, -1, :])
        x = torch.sigmoid(x)
        return x

In [None]:

class ResNetModel(nn.Module):  # 수정됨!!!
    def __init__(self, input_dim=CONFIG.N_MFCC, output_dim=CONFIG.N_CLASSES):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, output_dim)

    def forward(self, x):
        x = x.unsqueeze(1).unsqueeze(2)  # [batch_size, 1, 1, input_dim]으로 변경
        x = self.resnet(x)
        x = torch.sigmoid(x)
        return x

# Train & Validation

In [None]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, scheduler, train_loader, val_loader, device):  # 수정됨!!!
    model.to(device)
    criterion = nn.BCELoss().to(device)

    best_val_score = 0
    best_model = None

    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)

            optimizer.zero_grad()

            output = model(features)
            loss = criterion(output, labels)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')

        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model

        # Scheduler step
        scheduler.step(_val_loss)  # 수정됨!!!

    return best_model

## Run

In [None]:
model = ResNetModel()  # 수정됨!!!
optimizer = torch.optim.Adam(params=model.parameters(), lr=CONFIG.LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)  # 수정됨!!!

infer_model = train(model, optimizer, scheduler, train_loader, val_loader, device)  # 수정됨!!!

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 88.5MB/s]
 63%|██████▎   | 293/462 [04:50<02:54,  1.04s/it]

## Inference

In [None]:
test = pd.read_csv('./test.csv')
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

50000it [36:16, 22.97it/s]


In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)

            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [None]:
preds = inference(infer_model, test_loader, device)

100%|██████████| 521/521 [00:03<00:00, 166.92it/s]


## Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.9999,0.000101
1,TEST_00001,0.733738,0.267938
2,TEST_00002,0.799486,0.193129
3,TEST_00003,1.4e-05,0.999985
4,TEST_00004,0.003688,0.996174


In [None]:
submit.to_csv('./baseline_submit.csv', index=False)