In [1]:
# Load dataset
!gdown -- https://drive.google.com/uc?id=1CtpfMnFoTkTtB9ZwfUQOpwn06ZorIQIx

Downloading...
From (original): https://drive.google.com/uc?id=1CtpfMnFoTkTtB9ZwfUQOpwn06ZorIQIx
From (redirected): https://drive.google.com/uc?id=1CtpfMnFoTkTtB9ZwfUQOpwn06ZorIQIx&confirm=t&uuid=694f8223-9f3a-42cf-b18e-0c2fd5fec857
To: /content/fake_audio_dataset.zip
100% 3.31G/3.31G [00:35<00:00, 93.7MB/s]


In [2]:
!unzip -qq '/content/fake_audio_dataset.zip'

In [3]:
import librosa

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

!pip install torchmetrics

import torch
import torchmetrics
import os
import warnings
warnings.filterwarnings('ignore')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

Collecting torchmetrics
  Downloading torchmetrics-1.4.0.post0-py3-none-any.whl (868 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/868.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/868.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m868.8/868.8 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.3.post0-py3-none-any.whl (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->torchmetrics)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->t

In [4]:
class Config:
    SR = 16000
    N_MFCC = 128
    N_FFT = 400
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42

CONFIG = Config()

In [None]:
df = pd.read_csv('./train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

In [5]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)

        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC, n_fft=CONFIG.N_FFT)
        mfcc = preprocessing.scale(mfcc)
        mfcc = np.mean(mfcc.T, axis=0)

        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [29]:
def get_mfcc_feature_ul(file_path):
    features = []
    file_list = os.listdir(file_path)
    for file in file_list:

        full_path = os.path.join(file_path, file)
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(full_path, sr=CONFIG.SR)

        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC, n_fft=CONFIG.N_FFT)
        mfcc = preprocessing.scale(mfcc)
        mfcc = np.mean(mfcc.T, axis=0)

        features.append(mfcc)
    return features

In [30]:
path = '/content/unlabeled_data'
ul_data = get_mfcc_feature_ul(path)

In [31]:
print(len(ul_data))

1264


In [33]:
np.save('ul_data.npy', ul_data)

In [None]:
import sklearn

In [None]:
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [17:15, 42.81it/s]
11088it [03:58, 46.54it/s]


In [None]:
np.save('train_mfcc.npy', train_mfcc)
np.save('train_labels.npy', train_labels)

np.save('val_mfcc.npy', val_mfcc)
np.save('val_labels.npy', val_labels)

In [None]:
test = pd.read_csv('./test.csv')
test_mfcc = get_mfcc_feature(test, False)

50000it [24:17, 34.30it/s]


In [None]:
np.save('test_mfcc.npy', test_mfcc)