In [21]:
import librosa
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch
import torchmetrics
import os
import warnings

warnings.filterwarnings('ignore')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

class Config:
    SR = 32000
    N_MFCC = 100
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 50
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

df = pd.read_csv('./train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features



class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]



In [3]:
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [07:06, 103.98it/s]
11088it [01:47, 103.49it/s]


In [4]:
np.save("./train_Mfcc.npy", train_mfcc)
np.save("./train_Label.npy", train_labels)
np.save("./val_Mfcc.npy", val_mfcc)
np.save("./val_Label.npy", val_labels)

In [22]:
train_mfcc = np.load("./train_Mfcc.npy")
train_labels = np.load("./train_Label.npy")
val_mfcc = np.load("./val_Mfcc.npy")
val_labels = np.load("./val_Label.npy")

In [23]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [24]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        return torch.sigmoid(x)

# Autoencoder for pretraining
class Autoencoder(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim // 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

def get_mfcc_feature_unlabeled(file_paths):
    features = []
    for file_path in tqdm(file_paths):
        y, sr = librosa.load(file_path, sr=CONFIG.SR)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)
    return features



def pretrain_autoencoder(autoencoder, optimizer, dataloader, device):
    autoencoder.train()
    for epoch in range(CONFIG.N_EPOCHS):
        train_loss = []
        for features in tqdm(dataloader):
            features = features.float().to(device)
            
            optimizer.zero_grad()
            
            reconstructed = autoencoder(features)
            loss = criterion(reconstructed, features)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        print(f'Epoch [{epoch+1}/{CONFIG.N_EPOCHS}], Loss: {np.mean(train_loss):.5f}')
    
    return autoencoder



class PretrainedMLP(nn.Module):
    def __init__(self, autoencoder, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(PretrainedMLP, self).__init__()
        self.encoder = autoencoder.encoder
        self.fc1 = nn.Linear(hidden_dim // 2, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.encoder(x)
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        return torch.sigmoid(x)

In [25]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)
            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [9]:
unlabeled_file_paths = [os.path.join(CONFIG.ROOT_FOLDER, './unlabeled_data', f) for f in os.listdir(os.path.join(CONFIG.ROOT_FOLDER, './unlabeled_data'))]
unlabeled_mfcc = get_mfcc_feature_unlabeled(unlabeled_file_paths)


100%|██████████| 1264/1264 [00:15<00:00, 80.62it/s]


In [10]:
np.save("./unlabeled_Mfcc.npy", unlabeled_mfcc)

In [26]:
unlabeled_mfcc = np.load("./unlabeled_Mfcc.npy")

In [27]:

unlabeled_dataset = CustomDataset(unlabeled_mfcc, None)
unlabeled_loader = DataLoader(
    unlabeled_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)

autoencoder = Autoencoder().to(device)
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=CONFIG.LR)
criterion = nn.MSELoss()

In [28]:
autoencoder = pretrain_autoencoder(autoencoder, optimizer, unlabeled_loader, device)

100%|██████████| 14/14 [00:00<00:00, 571.30it/s]


Epoch [1/50], Loss: 854.60808


100%|██████████| 14/14 [00:00<00:00, 608.71it/s]


Epoch [2/50], Loss: 839.15180


100%|██████████| 14/14 [00:00<00:00, 560.01it/s]


Epoch [3/50], Loss: 835.19449


100%|██████████| 14/14 [00:00<00:00, 608.71it/s]


Epoch [4/50], Loss: 849.17286


100%|██████████| 14/14 [00:00<00:00, 541.49it/s]


Epoch [5/50], Loss: 831.81432


100%|██████████| 14/14 [00:00<00:00, 560.00it/s]


Epoch [6/50], Loss: 837.06270


100%|██████████| 14/14 [00:00<00:00, 636.36it/s]


Epoch [7/50], Loss: 850.91240


100%|██████████| 14/14 [00:00<00:00, 666.68it/s]


Epoch [8/50], Loss: 838.98024


100%|██████████| 14/14 [00:00<00:00, 621.96it/s]


Epoch [9/50], Loss: 837.46975


100%|██████████| 14/14 [00:00<00:00, 666.62it/s]


Epoch [10/50], Loss: 849.82921


100%|██████████| 14/14 [00:00<00:00, 700.01it/s]


Epoch [11/50], Loss: 846.30785


100%|██████████| 14/14 [00:00<00:00, 666.65it/s]


Epoch [12/50], Loss: 837.62959


100%|██████████| 14/14 [00:00<00:00, 699.89it/s]


Epoch [13/50], Loss: 847.11458


100%|██████████| 14/14 [00:00<00:00, 717.66it/s]


Epoch [14/50], Loss: 846.41958


100%|██████████| 14/14 [00:00<00:00, 736.78it/s]


Epoch [15/50], Loss: 841.78092


100%|██████████| 14/14 [00:00<00:00, 643.12it/s]


Epoch [16/50], Loss: 847.84528


100%|██████████| 14/14 [00:00<00:00, 666.64it/s]


Epoch [17/50], Loss: 849.23141


100%|██████████| 14/14 [00:00<00:00, 699.91it/s]


Epoch [18/50], Loss: 831.72817


100%|██████████| 14/14 [00:00<00:00, 682.69it/s]


Epoch [19/50], Loss: 836.36469


100%|██████████| 14/14 [00:00<00:00, 696.27it/s]


Epoch [20/50], Loss: 833.80748


100%|██████████| 14/14 [00:00<00:00, 700.02it/s]


Epoch [21/50], Loss: 840.27902


100%|██████████| 14/14 [00:00<00:00, 635.46it/s]


Epoch [22/50], Loss: 847.69275


100%|██████████| 14/14 [00:00<00:00, 691.89it/s]

Epoch [23/50], Loss: 846.76149



100%|██████████| 14/14 [00:00<00:00, 608.66it/s]


Epoch [24/50], Loss: 835.93536


100%|██████████| 14/14 [00:00<00:00, 736.87it/s]


Epoch [25/50], Loss: 847.99867


100%|██████████| 14/14 [00:00<00:00, 666.65it/s]


Epoch [26/50], Loss: 835.26653


100%|██████████| 14/14 [00:00<00:00, 777.62it/s]

Epoch [27/50], Loss: 832.41913



100%|██████████| 14/14 [00:00<00:00, 821.35it/s]


Epoch [28/50], Loss: 840.05624


100%|██████████| 14/14 [00:00<00:00, 736.84it/s]


Epoch [29/50], Loss: 840.24528


100%|██████████| 14/14 [00:00<00:00, 700.03it/s]


Epoch [30/50], Loss: 842.46502


100%|██████████| 14/14 [00:00<00:00, 736.82it/s]

Epoch [31/50], Loss: 838.74648



100%|██████████| 14/14 [00:00<00:00, 823.35it/s]


Epoch [32/50], Loss: 830.84933


100%|██████████| 14/14 [00:00<00:00, 650.96it/s]


Epoch [33/50], Loss: 835.30353


100%|██████████| 14/14 [00:00<00:00, 777.80it/s]


Epoch [34/50], Loss: 847.84036


100%|██████████| 14/14 [00:00<00:00, 777.80it/s]


Epoch [35/50], Loss: 830.52215


100%|██████████| 14/14 [00:00<00:00, 736.85it/s]


Epoch [36/50], Loss: 837.14836


100%|██████████| 14/14 [00:00<00:00, 699.61it/s]


Epoch [37/50], Loss: 842.14608


100%|██████████| 14/14 [00:00<00:00, 682.70it/s]


Epoch [38/50], Loss: 839.75105


100%|██████████| 14/14 [00:00<00:00, 736.82it/s]


Epoch [39/50], Loss: 841.80476


100%|██████████| 14/14 [00:00<00:00, 700.03it/s]


Epoch [40/50], Loss: 854.57100


100%|██████████| 14/14 [00:00<00:00, 700.02it/s]


Epoch [41/50], Loss: 831.01729


100%|██████████| 14/14 [00:00<00:00, 665.84it/s]


Epoch [42/50], Loss: 852.06760


100%|██████████| 14/14 [00:00<00:00, 666.64it/s]


Epoch [43/50], Loss: 841.26041


100%|██████████| 14/14 [00:00<00:00, 777.81it/s]


Epoch [44/50], Loss: 836.60253


100%|██████████| 14/14 [00:00<00:00, 875.00it/s]


Epoch [45/50], Loss: 829.03330


100%|██████████| 14/14 [00:00<00:00, 777.75it/s]


Epoch [46/50], Loss: 843.65608


100%|██████████| 14/14 [00:00<00:00, 799.45it/s]


Epoch [47/50], Loss: 845.83662


100%|██████████| 14/14 [00:00<00:00, 777.81it/s]


Epoch [48/50], Loss: 841.37097


100%|██████████| 14/14 [00:00<00:00, 777.77it/s]

Epoch [49/50], Loss: 830.58067



100%|██████████| 14/14 [00:00<00:00, 823.55it/s]

Epoch [50/50], Loss: 836.95370





In [29]:
pretrained_mlp = PretrainedMLP(autoencoder).to(device)
optimizer = torch.optim.Adam(pretrained_mlp.parameters(), lr=CONFIG.LR)
infer_model = train(pretrained_mlp, optimizer, train_loader, val_loader, device)
torch.save(infer_model.state_dict(), "./pretrained_mlp_model_03.pth")

100%|██████████| 462/462 [00:01<00:00, 442.22it/s]
100%|██████████| 116/116 [00:00<00:00, 829.91it/s]


Epoch [1], Train Loss : [0.23914] Val Loss : [0.04766] Val AUC : [0.99867]


100%|██████████| 462/462 [00:00<00:00, 463.29it/s]
100%|██████████| 116/116 [00:00<00:00, 778.39it/s]


Epoch [2], Train Loss : [0.04669] Val Loss : [0.03550] Val AUC : [0.99913]


100%|██████████| 462/462 [00:01<00:00, 448.06it/s]
100%|██████████| 116/116 [00:00<00:00, 811.10it/s]


Epoch [3], Train Loss : [0.03013] Val Loss : [0.02102] Val AUC : [0.99953]


100%|██████████| 462/462 [00:01<00:00, 461.80it/s]
100%|██████████| 116/116 [00:00<00:00, 680.31it/s]


Epoch [4], Train Loss : [0.02396] Val Loss : [0.01546] Val AUC : [0.99981]


100%|██████████| 462/462 [00:00<00:00, 480.16it/s]
100%|██████████| 116/116 [00:00<00:00, 636.81it/s]


Epoch [5], Train Loss : [0.01977] Val Loss : [0.01548] Val AUC : [0.99984]


100%|██████████| 462/462 [00:00<00:00, 480.19it/s]
100%|██████████| 116/116 [00:00<00:00, 696.66it/s]


Epoch [6], Train Loss : [0.01641] Val Loss : [0.01048] Val AUC : [0.99978]


100%|██████████| 462/462 [00:00<00:00, 476.22it/s]
100%|██████████| 116/116 [00:00<00:00, 888.82it/s]


Epoch [7], Train Loss : [0.01716] Val Loss : [0.01192] Val AUC : [0.99986]


100%|██████████| 462/462 [00:00<00:00, 464.91it/s]
100%|██████████| 116/116 [00:00<00:00, 811.10it/s]


Epoch [8], Train Loss : [0.01454] Val Loss : [0.01551] Val AUC : [0.99989]


100%|██████████| 462/462 [00:00<00:00, 468.98it/s]
100%|██████████| 116/116 [00:00<00:00, 797.21it/s]


Epoch [9], Train Loss : [0.01125] Val Loss : [0.01214] Val AUC : [0.99990]


100%|██████████| 462/462 [00:01<00:00, 437.74it/s]
100%|██████████| 116/116 [00:00<00:00, 737.39it/s]


Epoch [10], Train Loss : [0.01156] Val Loss : [0.00825] Val AUC : [0.99995]


100%|██████████| 462/462 [00:01<00:00, 448.93it/s]
100%|██████████| 116/116 [00:00<00:00, 781.04it/s]


Epoch [11], Train Loss : [0.01051] Val Loss : [0.00711] Val AUC : [0.99996]


100%|██████████| 462/462 [00:01<00:00, 460.07it/s]
100%|██████████| 116/116 [00:00<00:00, 680.29it/s]


Epoch [12], Train Loss : [0.00757] Val Loss : [0.01107] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 443.11it/s]
100%|██████████| 116/116 [00:00<00:00, 688.36it/s]


Epoch [13], Train Loss : [0.00903] Val Loss : [0.00697] Val AUC : [0.99995]


100%|██████████| 462/462 [00:01<00:00, 434.97it/s]
100%|██████████| 116/116 [00:00<00:00, 768.10it/s]


Epoch [14], Train Loss : [0.00740] Val Loss : [0.00772] Val AUC : [0.99996]


100%|██████████| 462/462 [00:01<00:00, 420.28it/s]
100%|██████████| 116/116 [00:00<00:00, 856.02it/s]


Epoch [15], Train Loss : [0.00693] Val Loss : [0.00788] Val AUC : [0.99995]


100%|██████████| 462/462 [00:00<00:00, 472.85it/s]
100%|██████████| 116/116 [00:00<00:00, 703.65it/s]


Epoch [16], Train Loss : [0.00695] Val Loss : [0.00987] Val AUC : [0.99994]


100%|██████████| 462/462 [00:01<00:00, 457.66it/s]
100%|██████████| 116/116 [00:00<00:00, 868.85it/s]


Epoch [17], Train Loss : [0.00541] Val Loss : [0.00664] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 430.49it/s]
100%|██████████| 116/116 [00:00<00:00, 774.97it/s]


Epoch [18], Train Loss : [0.00582] Val Loss : [0.01090] Val AUC : [0.99994]


100%|██████████| 462/462 [00:01<00:00, 413.94it/s]
100%|██████████| 116/116 [00:00<00:00, 860.99it/s]


Epoch [19], Train Loss : [0.00585] Val Loss : [0.01642] Val AUC : [0.99987]


100%|██████████| 462/462 [00:01<00:00, 437.91it/s]
100%|██████████| 116/116 [00:00<00:00, 760.62it/s]


Epoch [20], Train Loss : [0.00460] Val Loss : [0.00599] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 442.76it/s]
100%|██████████| 116/116 [00:00<00:00, 737.06it/s]


Epoch [21], Train Loss : [0.00567] Val Loss : [0.00771] Val AUC : [0.99992]


100%|██████████| 462/462 [00:01<00:00, 438.23it/s]
100%|██████████| 116/116 [00:00<00:00, 849.75it/s]


Epoch [22], Train Loss : [0.00420] Val Loss : [0.00797] Val AUC : [0.99995]


100%|██████████| 462/462 [00:00<00:00, 469.68it/s]
100%|██████████| 116/116 [00:00<00:00, 962.58it/s]


Epoch [23], Train Loss : [0.00485] Val Loss : [0.00771] Val AUC : [0.99998]


100%|██████████| 462/462 [00:00<00:00, 471.33it/s]
100%|██████████| 116/116 [00:00<00:00, 822.58it/s]


Epoch [24], Train Loss : [0.00313] Val Loss : [0.00688] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 453.90it/s]
100%|██████████| 116/116 [00:00<00:00, 757.86it/s]


Epoch [25], Train Loss : [0.00318] Val Loss : [0.01116] Val AUC : [0.99994]


100%|██████████| 462/462 [00:01<00:00, 461.82it/s]
100%|██████████| 116/116 [00:00<00:00, 641.08it/s]


Epoch [26], Train Loss : [0.00441] Val Loss : [0.01307] Val AUC : [0.99989]


100%|██████████| 462/462 [00:01<00:00, 442.44it/s]
100%|██████████| 116/116 [00:00<00:00, 706.14it/s]


Epoch [27], Train Loss : [0.00420] Val Loss : [0.00842] Val AUC : [0.99994]


100%|██████████| 462/462 [00:01<00:00, 459.70it/s]
100%|██████████| 116/116 [00:00<00:00, 686.55it/s]


Epoch [28], Train Loss : [0.00457] Val Loss : [0.00810] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 451.78it/s]
100%|██████████| 116/116 [00:00<00:00, 871.68it/s]


Epoch [29], Train Loss : [0.00228] Val Loss : [0.00765] Val AUC : [0.99995]


100%|██████████| 462/462 [00:00<00:00, 473.62it/s]
100%|██████████| 116/116 [00:00<00:00, 888.83it/s]


Epoch [30], Train Loss : [0.00355] Val Loss : [0.00916] Val AUC : [0.99996]


100%|██████████| 462/462 [00:01<00:00, 446.43it/s]
100%|██████████| 116/116 [00:00<00:00, 713.31it/s]


Epoch [31], Train Loss : [0.00442] Val Loss : [0.00827] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 459.57it/s]
100%|██████████| 116/116 [00:00<00:00, 790.63it/s]


Epoch [32], Train Loss : [0.00295] Val Loss : [0.00662] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 452.01it/s]
100%|██████████| 116/116 [00:00<00:00, 704.84it/s]


Epoch [33], Train Loss : [0.00270] Val Loss : [0.00836] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 460.99it/s]
100%|██████████| 116/116 [00:00<00:00, 786.34it/s]


Epoch [34], Train Loss : [0.00224] Val Loss : [0.00788] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 460.55it/s]
100%|██████████| 116/116 [00:00<00:00, 846.59it/s]


Epoch [35], Train Loss : [0.00371] Val Loss : [0.00715] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 454.90it/s]
100%|██████████| 116/116 [00:00<00:00, 796.01it/s]


Epoch [36], Train Loss : [0.00224] Val Loss : [0.00543] Val AUC : [0.99998]


100%|██████████| 462/462 [00:01<00:00, 447.17it/s]
100%|██████████| 116/116 [00:00<00:00, 738.72it/s]


Epoch [37], Train Loss : [0.00250] Val Loss : [0.00521] Val AUC : [0.99999]


100%|██████████| 462/462 [00:01<00:00, 438.72it/s]
100%|██████████| 116/116 [00:00<00:00, 579.47it/s]


Epoch [38], Train Loss : [0.00212] Val Loss : [0.01720] Val AUC : [0.99984]


100%|██████████| 462/462 [00:01<00:00, 428.11it/s]
100%|██████████| 116/116 [00:00<00:00, 748.66it/s]


Epoch [39], Train Loss : [0.00239] Val Loss : [0.00856] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 385.50it/s]
100%|██████████| 116/116 [00:00<00:00, 743.76it/s]


Epoch [40], Train Loss : [0.00250] Val Loss : [0.00572] Val AUC : [0.99998]


100%|██████████| 462/462 [00:00<00:00, 484.33it/s]
100%|██████████| 116/116 [00:00<00:00, 725.21it/s]


Epoch [41], Train Loss : [0.00223] Val Loss : [0.01083] Val AUC : [0.99994]


100%|██████████| 462/462 [00:01<00:00, 449.35it/s]
100%|██████████| 116/116 [00:00<00:00, 707.85it/s]


Epoch [42], Train Loss : [0.00235] Val Loss : [0.00913] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 433.31it/s]
100%|██████████| 116/116 [00:00<00:00, 805.44it/s]


Epoch [43], Train Loss : [0.00301] Val Loss : [0.00958] Val AUC : [0.99996]


100%|██████████| 462/462 [00:00<00:00, 463.24it/s]
100%|██████████| 116/116 [00:00<00:00, 888.25it/s]


Epoch [44], Train Loss : [0.00128] Val Loss : [0.00678] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 460.51it/s]
100%|██████████| 116/116 [00:00<00:00, 703.43it/s]


Epoch [45], Train Loss : [0.00123] Val Loss : [0.00705] Val AUC : [0.99998]


100%|██████████| 462/462 [00:01<00:00, 447.93it/s]
100%|██████████| 116/116 [00:00<00:00, 805.45it/s]


Epoch [46], Train Loss : [0.00359] Val Loss : [0.00840] Val AUC : [0.99994]


100%|██████████| 462/462 [00:00<00:00, 465.90it/s]
100%|██████████| 116/116 [00:00<00:00, 791.76it/s]


Epoch [47], Train Loss : [0.00330] Val Loss : [0.00666] Val AUC : [0.99997]


100%|██████████| 462/462 [00:01<00:00, 461.00it/s]
100%|██████████| 116/116 [00:00<00:00, 705.62it/s]


Epoch [48], Train Loss : [0.00068] Val Loss : [0.00577] Val AUC : [0.99998]


100%|██████████| 462/462 [00:01<00:00, 452.48it/s]
100%|██████████| 116/116 [00:00<00:00, 720.51it/s]


Epoch [49], Train Loss : [0.00197] Val Loss : [0.00822] Val AUC : [0.99996]


100%|██████████| 462/462 [00:01<00:00, 454.14it/s]
100%|██████████| 116/116 [00:00<00:00, 791.75it/s]

Epoch [50], Train Loss : [0.00178] Val Loss : [0.00677] Val AUC : [0.99997]





In [15]:
val2 = pd.read_csv('./test.csv')
val2_mfcc = get_mfcc_feature(val2, False)
val2_dataset = CustomDataset(val2_mfcc, None)
val2_loader = DataLoader(
    val2_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

50000it [10:07, 82.27it/s]


In [16]:
np.save("./test_Mfcc.npy", val2_mfcc)

In [39]:
val2_mfcc = np.load("./train_Mfcc.npy")
val3_mfcc = np.load("./val_Mfcc.npy")
val2_mfcc = np.concatenate((val2_mfcc, val3_mfcc), axis=0)
val2_dataset = CustomDataset(val2_mfcc, None)
val2_loader = DataLoader(
    val2_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [40]:
preds = inference(infer_model, val2_loader, device)

100%|██████████| 578/578 [00:00<00:00, 1627.76it/s]


In [41]:
submit = pd.read_csv('./train_data.csv')
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,RUNQPNJF,1.0,0.0
1,JFAWUOGJ,0.999878,0.000124
2,RDKEKEVX,0.0,1.0
3,QYHJDOFK,0.999999,1e-06
4,RSPQNHAO,0.0,1.0


In [36]:
submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()

ValueError: Length of values (44350) does not match length of index (50000)

In [42]:
submit.to_csv('train_pre.csv',index=False)