In [9]:
import os
import shutil
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

import random
import librosa
import pickle

import torch

import warnings
warnings.filterwarnings('ignore')

In [10]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 16
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [4]:
df = pd.read_csv('./data/train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

# Get Features

In [5]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [None]:
train_mfcc_features, train_mfcc_labels = get_mfcc_feature(train, True)
val_mfcc_features, val_mfcc_labels = get_mfcc_feature(val, True)

In [None]:
print(type(train_mfcc_features), type(train_mfcc_labels))
print(train_mfcc_features)
print(train_mfcc_labels)

In [None]:
with open('data/features/train_mfcc_features.pkl', 'wb') as f:
    pickle.dump(train_mfcc_features, f)
with open('data/features/train_mfcc_labels.pkl', 'wb') as f:
    pickle.dump(train_mfcc_labels, f)
with open('data/features/val_mfcc_features.pkl', 'wb') as f:
    pickle.dump(val_mfcc_features, f)
with open('data/features/val_mfcc_labels.pkl', 'wb') as f:
    pickle.dump(val_mfcc_labels, f)

In [None]:
def get_melspec_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8192)   
        melspec_dB = librosa.power_to_db(melspec, ref=np.max)
        features.append(melspec_dB)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [None]:
train_melspec_features, train_melspec_labels = get_melspec_feature(train, True)
val_melspec_features, val_melspec_labels = get_melspec_feature(val, True)

In [None]:
with open('data/features/train_melspec_features.pkl', 'wb') as f:
    pickle.dump(train_melspec_features, f)
with open('data/features/train_melspec_labels.pkl', 'wb') as f:
    pickle.dump(train_melspec_labels, f)
with open('data/features/val_melspec_features.pkl', 'wb') as f:
    pickle.dump(val_melspec_features, f)
with open('data/features/val_melspec_labels.pkl', 'wb') as f:
    pickle.dump(val_melspec_labels, f)

In [None]:
with open('data/features/train_mfcc_features.pkl', 'rb') as f:
    train_mfcc_features = pickle.load(f)
with open('data/features/train_mfcc_labels.pkl', 'rb') as f:
    train_mfcc_labels = pickle.load(f)
with open('data/features/val_mfcc_features.pkl', 'rb') as f:
    val_mfcc_features = pickle.load(f)
with open('data/features/val_mfcc_labels.pkl', 'rb') as f:
    val_mfcc_labels = pickle.load(f)

In [None]:
with open('data/features/train_melspec_features.pkl', 'rb') as f:
    train_melspec_features = pickle.load(f)
with open('data/features/train_melspec_labels.pkl', 'rb') as f:
    train_melspec_labels = pickle.load(f)
with open('data/features/val_melspec_features.pkl', 'rb') as f:
    val_melspec_features = pickle.load(f)
with open('data/features/val_melspec_labels.pkl', 'rb') as f:
    val_melspec_labels = pickle.load(f)

# Visualize

In [None]:
def mfcc_visualize(csv_path, save_path):
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    df = pd.read_csv(csv_path)

    for i in tqdm(range(len(df))):
        image_data = df.loc[i]

        y, sr = librosa.load(os.path.join('data', image_data['path']), sr=CONFIG.SR)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)

        plt.figure(figsize=(8, 4))
        librosa.display.specshow(mfccs, x_axis='time')
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(f"{save_path}/{image_data['id']}.png", dpi=300, bbox_inches='tight', pad_inches=0)
        plt.close()

In [None]:
# mfcc_visualize(csv_path="data/train_answer.csv", save_path="data/train_mfcc")
# mfcc_visualize(csv_path="data/val_answer.csv", save_path="data/val_mfcc")
# mfcc_visualize(csv_path="data/test.csv", save_path="data/test_mfcc")

In [None]:
def melspec_visualize(csv_path, save_path):
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    df = pd.read_csv(csv_path)

    for i in tqdm(range(len(df))):
        image_data = df.loc[i]

        y, sr = librosa.load(os.path.join('data', image_data['path']), sr=32000)
        melspecs = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8192)  # n_mels: 멜 필터의 개수, fmax: 주파수 최대값
        melspecs_db = librosa.power_to_db(melspecs, ref=np.max)

        plt.figure(figsize=(8, 4))
        librosa.display.specshow(melspecs_db, x_axis='time', y_axis='mel', sr=sr, fmax=8192)
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(f"{save_path}/{image_data['id']}.png", dpi=300, bbox_inches='tight', pad_inches=0)
        plt.close()

In [None]:
# melspec_visualize(csv_path="train_answer.csv", save_path="train_melspec")
# melspec_visualize(csv_path="val_answer.csv", save_path="val_melspec")
# melspec_visualize(csv_path="data/test.csv", save_path="data/test_melspec")

# Data preprocessing

In [18]:
import soundfile as sf
audio_path1 = 'data/train/RUNQPNJF.ogg'
audio_path2 = 'data/train/RDKEKEVX.ogg'

y1, sr1 = librosa.load(audio_path1, sr=32000)
y2, sr2 = librosa.load(audio_path2, sr=32000)

max_length = max(len(y1), len(y2))
y1 = np.pad(y1, (0, max_length - len(y1)), 'constant')
y2 = np.pad(y2, (0, max_length - len(y2)), 'constant')

y = y1 + y2

S = librosa.feature.melspectrogram(y=y, sr=sr1, n_mels=128, fmax=8192)
S_dB = librosa.power_to_db(S, ref=np.max)

sf.write("example/combine_sound.ogg", y, sr1)

In [19]:
print(S_dB.shape)

(128, 385)


In [24]:
image_data = pd.read_csv('data/train_answer.csv')
if 'label' in image_data.columns:
    print(1)
else:
    print(0)

1
