# Imports

### 모델 학습 및 추론에 사용할 라이브러리들을 불러옵니다.

In [1]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os


In [2]:
import warnings
warnings.filterwarnings('ignore')


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Config

1. SR(Sample Rate)

    - 오디오 데이터의 샘플링 레이트를 설정합니다.
    - 오디오 데이터의 초당 샘플 수를 정의합니다.

2. N_MFCC(Number of MFCCs)

    - N_MFCC는 멜 주파수 켑스트럼 계수(MFCCs)의 개수를 의미합니다.
    - MFCC는 오디오 신호의 주파수 특성을 인간의 청각 특성에 맞게 변환한 것으로, 음성 인식 등의 분야에서 많이 사용됩니다.

3. ROOT_FOLDER

    - 데이터셋의 루트 폴더 경로를 설정합니다.

4. N_CLASSES

    - 분류할 클래스의 수를 설정합니다.
    - 모델의 출력 차원을 설정할 때 사용됩니다.

5. BATCH_SIZE

    - 배치 크기를 설정합니다.
    - 학습 시 한 번에 처리할 데이터 샘플의 수를 정의합니다
    - 큰 배치 크기는 메모리 사용량을 증가시키지만, 학습 속도를 높입니다.

6. N_EPOCHS

    - 학습 에폭 수를 설정합니다.
    - 전체 데이터셋을 학습할 횟수를 정의합니다.
    - 에폭 수가 너무 적으면 과소적합이 발생할 수 있고, 너무 많으면 과적합이 발생할 수 있습니다.

7. LR (Learning Rate)

    - 학습률을 설정합니다.
    - 모델의 가중치를 업데이트할 때 사용되는 학습 속도를 정의합니다.
    - 학습률이 너무 크면 학습이 불안정해질 수 있고, 너무 작으면 학습 속도가 느려집니다.

8. SEED

    - 재현성을 위해 SEED값을 고정하는 SEED를 설정해줍니다.

In [4]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()


# Fixed RandomSeed

- 아래의 코드는 머신러닝이나 딥러닝 모델을 훈련할 때, 결과의 재현성을 보장하기 위해 사용되는 함수입니다.
- 이 함수는 다양한 랜덤 시드를 고정하여, 실행할 때마다 동일한 결과를 얻기 위해 사용됩니다.

In [20]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정


# Train & Validation Split
여기서는 인공지능 모델 학습에서 중요한 데이터 전처리를 수행합니다.

모델을 훈련하기 전에, 전체 데이터 세트를 두 개의 서브셋으로 나눠줍니다.
하나는 모델 학습에 사용되는 학습 데이터 세트이고, 다른 하나는 학습된 모델의 성능을 평가하기 위한 검증 데이터 세트입니다.

이렇게 데이터를 분할하는 이유는 모델이 새로운 데이터에 대해 얼마나 잘 일반화하는지 평가하기 위해서입니다.
모델이 훈련 데이터에만 과도하게 최적화되지 않도록 검증 데이터 세트를 사용하여 모델의 성능을 주기적으로 검증합니다.

In [6]:
df = pd.read_csv('./train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)


## Data Pre-processing : MFCC

- 이 코드는 MRCC 특징을 추출하고, 이를 학습에 사용할 형식으로 변환하는 함수를 정의하는 코드입니다.
 
- librosa.load를 사용하여 row['path']에 해당하는 오디오 파일을 로드합니다.
 
- 샘플링 레이트는 CONFIG.SR로 지정됩니다.
 
- librosa.feature.mfcc를 사용하여 오디오 신호 y로부터 MFCC 특징을 추출합니다.
 
- CONFIG.N_MFCC는 추출할 MFCC 계수의 개수를 지정합니다.
 
- 추출된 MFCC는 프레임별로 계산되므로, 각 프레임의 평균값을 구하여 전체 파일에 대한 MFCC 특징을 대표하는 벡터를 얻습니다.
 
- train_mode = True인 경우, 현재 행의 레이블을 읽어와 CONFIG.N_CLASSES 길이의 벡터로 변환합니다.
 
- 레이블이 'fake'이면 첫 번째 원소를 1로, 'real'이면 두 번째 원소를 1로 설정합니다.

- 이 벡터를 labels 리스트에 추가합니다.

In [7]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features


In [8]:
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)


44350it [08:22, 88.27it/s] 
11088it [02:09, 85.31it/s] 


# Dataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]


In [10]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)


In [11]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)


# Define Model

- MLP() 클래스는 PyTorch의 nn.Module을 상속받아 정의된 Multilayer Perceptron 모델입니다.

1. __init__메서드

- 모델의 각 구성요소를 초기화 합니다.
- input_dim=CONFIG.N_MFCC : MFCC의 개수를 의미합니다.
- hidden_dim : 은닉층의 차원 수입니다.
- output_dim : 분류할 클래스의 수를 의미합니다.

2. __forward__ 메서드

- forward 메서드는 입력 데이터를 순차적으로 세 개의 선형 계층과 ReLU 활성화 함수를 거쳐 최종적으로 시그모이드 함수를 적용하여 출력 확률을 계산합니다.

In [12]:
class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x


# Train & Validation
아래의 코드는 PyTorch를 사용한 딥러닝 모델의 훈련 및 검증 과정과 다중 레이블 AUC 점수 계산을 포함합니다.

In [13]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score


## Run
모델을 초기화하고, 옵티마이저를 설정한 다음 모델 훈련 함수를 호출하여 실제로 훈련 과정을 실행하는 단계입니다.

In [14]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)


100%|██████████| 462/462 [00:03<00:00, 141.01it/s]
100%|██████████| 116/116 [00:00<00:00, 688.25it/s]


Epoch [1], Train Loss : [0.51093] Val Loss : [0.33744] Val AUC : [0.93560]


100%|██████████| 462/462 [00:03<00:00, 148.41it/s]
100%|██████████| 116/116 [00:00<00:00, 546.48it/s]


Epoch [2], Train Loss : [0.31068] Val Loss : [0.28448] Val AUC : [0.95397]


100%|██████████| 462/462 [00:03<00:00, 121.73it/s]
100%|██████████| 116/116 [00:00<00:00, 446.78it/s]


Epoch [3], Train Loss : [0.26747] Val Loss : [0.27039] Val AUC : [0.96240]


100%|██████████| 462/462 [00:04<00:00, 113.45it/s]
100%|██████████| 116/116 [00:00<00:00, 550.63it/s]


Epoch [4], Train Loss : [0.23842] Val Loss : [0.22569] Val AUC : [0.96931]


100%|██████████| 462/462 [00:03<00:00, 116.04it/s]
100%|██████████| 116/116 [00:00<00:00, 413.54it/s]


Epoch [5], Train Loss : [0.21854] Val Loss : [0.20937] Val AUC : [0.97350]


## Inference

In [15]:
test = pd.read_csv('./test.csv')
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)


50000it [13:43, 60.70it/s]


In [16]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions


In [17]:
preds = inference(infer_model, test_loader, device)


100%|██████████| 521/521 [00:00<00:00, 1032.41it/s]


## Submission

In [18]:
submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()


Unnamed: 0,id,fake,real
0,TEST_00000,0.275122,0.899598
1,TEST_00001,0.042682,0.979734
2,TEST_00002,0.065133,0.961473
3,TEST_00003,0.009338,0.985858
4,TEST_00004,0.175708,0.85136


In [19]:
submit.to_csv('./baseline_submit.csv', index=False)


In [1]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

df = pd.read_csv('./train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x
    

from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)

test = pd.read_csv('./test.csv')
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

preds = inference(infer_model, test_loader, device)

submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()

submit.to_csv('./test_submit.csv', index=False)


44350it [10:10, 72.68it/s] 
11088it [02:51, 64.71it/s]
100%|██████████| 462/462 [00:03<00:00, 115.79it/s]
100%|██████████| 116/116 [00:00<00:00, 343.54it/s]


Epoch [1], Train Loss : [0.51093] Val Loss : [0.33744] Val AUC : [0.93560]


100%|██████████| 462/462 [00:03<00:00, 124.68it/s]
100%|██████████| 116/116 [00:00<00:00, 339.76it/s]


Epoch [2], Train Loss : [0.31068] Val Loss : [0.28448] Val AUC : [0.95397]


100%|██████████| 462/462 [00:03<00:00, 123.21it/s]
100%|██████████| 116/116 [00:00<00:00, 441.09it/s]


Epoch [3], Train Loss : [0.26747] Val Loss : [0.27039] Val AUC : [0.96240]


100%|██████████| 462/462 [00:03<00:00, 119.36it/s]
100%|██████████| 116/116 [00:00<00:00, 467.53it/s]


Epoch [4], Train Loss : [0.23842] Val Loss : [0.22569] Val AUC : [0.96931]


100%|██████████| 462/462 [00:03<00:00, 127.63it/s]
100%|██████████| 116/116 [00:00<00:00, 515.87it/s]


Epoch [5], Train Loss : [0.21854] Val Loss : [0.20937] Val AUC : [0.97350]


50000it [17:38, 47.25it/s]
100%|██████████| 521/521 [00:01<00:00, 518.32it/s]


In [1]:
import numpy as np
import pandas as pd
import os
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import random
import warnings

warnings.filterwarnings('ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

class Config:
    SR = 32000
    N_MFCC = 13
    N_FEATURES = 64
    ROOT_FOLDER = './'
    N_CLASSES = 2
    BATCH_SIZE = 64
    N_EPOCHS = 20  # Increase the number of epochs for better training
    LR = 1e-4  # Lower learning rate
    SEED = 42
    DROPOUT_RATE = 0.5
    HIDDEN_DIMS = [128, 256, 128]
    INITIALIZATION = 'xavier'
    ACTIVATION_FUNCTION = 'relu'
    PSEUDO_LABEL_THRESHOLD = 0.95  # Higher threshold for pseudo-labeling

CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED)

# 데이터 로드 및 전처리
df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))

def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

df['class'] = label_encoder(df['label'])

# 음성 특징 추출 함수 (시간 스프레딩 및 주파수 스프레딩 추가)
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=CONFIG.SR)
    
    # 시간 스프레딩 (Time Stretching)
    stretched_y = librosa.effects.time_stretch(y, rate=0.8)
    
    mfcc = librosa.feature.mfcc(y=stretched_y, sr=sr, n_mfcc=CONFIG.N_MFCC)
    chroma = librosa.feature.chroma_stft(y=stretched_y, sr=sr)
    mel = librosa.feature.melspectrogram(y=stretched_y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=stretched_y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(stretched_y), sr=sr)
    
    # 주파수 스프레딩 (Frequency Masking)
    masker = np.random.uniform(low=0.0, high=1.0, size=chroma.shape) < 0.2
    chroma[masker] = 0.0
    
    features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
    return features

def get_features_and_labels(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        file_path = os.path.join(CONFIG.ROOT_FOLDER, row['path'][2:])  # './train/' 또는 './test/' 제거
        if os.path.isfile(file_path):
            features.append(extract_features(file_path))
            if train_mode:
                labels.append(row['class'])
    if train_mode:
        return np.array(features), np.array(labels)
    return np.array(features)

# Train 데이터에서 특징 추출
X, y = get_features_and_labels(df, True)

# 데이터 불균형 해결
smote = SMOTE(random_state=CONFIG.SEED)
X_resampled, y_resampled = smote.fit_resample(X.reshape(len(X), -1), y)
X_resampled = X_resampled.reshape(len(X_resampled), CONFIG.N_FEATURES, -1)
y_resampled = torch.tensor(y_resampled).long()  # 정수형으로 변환
y_resampled = torch.nn.functional.one_hot(y_resampled, num_classes=CONFIG.N_CLASSES).float()

# 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=CONFIG.SEED)

# PyTorch Dataset 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

# CNN+RNN 모델 정의 (CNN 레이어 개선)
class CNNRNN(nn.Module):
    def __init__(self, input_dim=CONFIG.N_FEATURES, hidden_dims=CONFIG.HIDDEN_DIMS, output_dim=CONFIG.N_CLASSES):
        super(CNNRNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),  # 필터 수를 64로 증가
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(CONFIG.DROPOUT_RATE),
            
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # 필터 수를 128로 증가
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(CONFIG.DROPOUT_RATE)
        )
        
        self.rnn = nn.LSTM(input_size=128*8, hidden_size=hidden_dims[0], num_layers=3, batch_first=True, dropout=CONFIG.DROPOUT_RATE, bidirectional=True)  # LSTM 레이어를 3개로 증가
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dims[0]*2, hidden_dims[1]),
            nn.ReLU(),
            nn.Dropout(CONFIG.DROPOUT_RATE),
            nn.Linear(hidden_dims[1], hidden_dims[2]),
            nn.ReLU(),
            nn.Dropout(CONFIG.DROPOUT_RATE),
            nn.Linear(hidden_dims[2], output_dim),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        batch_size = x.size(0)
        x = x.unsqueeze(1)  # (batch_size, 1, n_features, time_steps)
        x = self.cnn(x)
        x = x.view(batch_size, 128*8, -1).permute(0, 2, 1)  # (batch_size, time_steps, 128*8)
        x, _ = self.rnn(x)
        x = x[:, -1, :]  # 마지막 타임 스텝의 출력
        x = self.fc(x)
        return x

# 모델 학습 함수 정의
from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

model = CNNRNN()
optimizer = torch.optim.Adam(params=model.parameters(), lr=CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)

# Unlabeled 데이터에 대해 pseudo-labeling
unlabeled_path = os.path.join(CONFIG.ROOT_FOLDER, 'unlabeled_data')
unlabeled_files = [os.path.join(unlabeled_path, f) for f in os.listdir(unlabeled_path) if f.endswith('.ogg')]

unlabeled_features = get_features_and_labels(unlabeled_files, train_mode=False)
unlabeled_features = np.array(unlabeled_features)
unlabeled_dataset = CustomDataset(unlabeled_features, torch.zeros((len(unlabeled_features), CONFIG.N_CLASSES)))
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

def pseudo_labeling(model, loader, device):
    model.to(device)
    model.eval()
    pseudo_labels = []
    with torch.no_grad():
        for features, _ in tqdm(iter(loader)):
            features = features.float().to(device)
            probs = model(features)
            pseudo_labels.append(probs.cpu().detach().numpy())
    return np.concatenate(pseudo_labels, axis=0)

pseudo_labels = pseudo_labeling(infer_model, unlabeled_loader, device)
pseudo_labels = (pseudo_labels > CONFIG.PSEUDO_LABEL_THRESHOLD).astype(int)  # 임계값을 CONFIG.PSEUDO_LABEL_THRESHOLD로 변경

# Pseudo-labeled 데이터를 학습 데이터에 추가
pseudo_labeled_dataset = CustomDataset(unlabeled_features, torch.tensor(pseudo_labels).float())
train_dataset_combined = torch.utils.data.ConcatDataset([train_dataset, pseudo_labeled_dataset])
train_loader_combined = DataLoader(train_dataset_combined, batch_size=CONFIG.BATCH_SIZE, shuffle=True)

# 모델을 pseudo-labeled 데이터를 포함하여 재학습
infer_model = train(model, optimizer, train_loader_combined, val_loader, device)

# 테스트 데이터 예측
test_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))
test_features = get_features_and_labels(test_df, train_mode=False)
test_features = np.array(test_features)
test_dataset = CustomDataset(test_features, torch.zeros((len(test_features), CONFIG.N_CLASSES)))
test_loader = DataLoader(test_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features, _ in tqdm(iter(test_loader)):
            features = features.float().to(device)
            probs = model(features)
            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

preds = inference(infer_model, test_loader, device)

submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

submit.to_csv('./pseudo_label_submit13.csv', index=False)


label ['fake' 'real']


55438it [18:08:32,  1.18s/it]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (55438, 166) + inhomogeneous part.

In [3]:
import numpy as np
import pandas as pd
import os
import librosa
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler

class Config:
    SR = 32000
    N_MFCC = 13
    N_MFCC_LEN = 431  # 고정된 MFCC 길이
    ROOT_FOLDER = './'
    N_CLASSES = 2
    BATCH_SIZE = 64
    N_EPOCHS = 15
    LR = 2e-4
    SEED = 42
    DROPOUT_RATE = 0.2
    NUM_CHANNELS = 64
    DILATION_DEPTH = 9
    KERNEL_SIZE = 2
    LR_DECAY_STEP = 5
    LR_DECAY_GAMMA = 0.1
    EARLY_STOPPING_PATIENCE = 3

CONFIG = Config()

def seed_everything(seed):
    import random
    import torch
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED)

# Data loading and preprocessing
df = pd.read_csv('./train.csv')

def label_encoder(column):
    le = LabelEncoder().fit(column)
    print(column.name, le.classes_)
    return le.transform(column)

df['class'] = label_encoder(df['label'])

# MFCC feature extraction with augmentation
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        
        # Time Stretching
        stretched_y = librosa.effects.time_stretch(y, rate=0.8)
        
        # n_fft 값을 조정하여 warning 방지
        n_fft = min(1024, len(stretched_y))
        
        # MFCC
        mfcc = librosa.feature.mfcc(y=stretched_y, sr=sr, n_mfcc=CONFIG.N_MFCC, n_fft=n_fft)
        
        # Chroma
        chroma = librosa.feature.chroma_stft(y=stretched_y, sr=sr, n_fft=n_fft)
        
        # Mel spectrogram
        mel = librosa.feature.melspectrogram(y=stretched_y, sr=sr, n_fft=n_fft)
        
        # Spectral contrast
        contrast = librosa.feature.spectral_contrast(y=stretched_y, sr=sr, n_fft=n_fft)
        
        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(stretched_y), sr=sr)
        
        # Frequency Masking on Chroma
        masker = np.random.uniform(low=0.0, high=1.0, size=chroma.shape) < 0.2
        chroma[masker] = 0.0
        
        # Fix length
        mfcc = librosa.util.fix_length(mfcc, size=CONFIG.N_MFCC_LEN, axis=1)
        chroma = librosa.util.fix_length(chroma, size=CONFIG.N_MFCC_LEN, axis=1)
        mel = librosa.util.fix_length(mel, size=CONFIG.N_MFCC_LEN, axis=1)
        contrast = librosa.util.fix_length(contrast, size=CONFIG.N_MFCC_LEN, axis=1)
        tonnetz = librosa.util.fix_length(tonnetz, size=CONFIG.N_MFCC_LEN, axis=1)
        
        # Concatenate all features
        feature = np.concatenate([mfcc, chroma, mel, contrast, tonnetz], axis=0)
        features.append(feature)

        if train_mode:
            label = row['class']
            labels.append(label)

    return features, labels

# 특징 추출 후 저장
features, labels = get_mfcc_feature(df, True)
feature_df = pd.DataFrame({'features': features, 'class': labels})
feature_df.to_csv('./features_labels.csv', index=False)
print("특징을 추출하여 저장했습니다.")


label ['fake' 'real']


  return pitch_tuning(
45750it [11:38:36,  1.09it/s]


MemoryError: Unable to allocate 585. KiB for an array with shape (1025, 146) and data type float32

In [None]:
import numpy as np
import pandas as pd
import os
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import random
import warnings
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.calibration import calibration_curve

warnings.filterwarnings('ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

class Config:
    SR = 32000
    N_MFCC = 13
    N_MFCC_LEN = 431  # 고정된 MFCC 길이
    ROOT_FOLDER = './'
    N_CLASSES = 2
    BATCH_SIZE = 64
    N_EPOCHS = 15
    LR = 2e-4
    SEED = 42
    DROPOUT_RATE = 0.2
    NUM_CHANNELS = 64
    DILATION_DEPTH = 9
    KERNEL_SIZE = 2
    LR_DECAY_STEP = 5
    LR_DECAY_GAMMA = 0.1
    EARLY_STOPPING_PATIENCE = 3

CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED)

# 저장된 특징 불러오기
feature_df = pd.read_csv('./features_labels.csv')
X = np.array([np.fromstring(feature[1:-1], sep=',') for feature in feature_df['features']])
y = np.array(feature_df['class'].tolist())

# Handling data imbalance
ros = RandomOverSampler(random_state=CONFIG.SEED)
X_resampled, y_resampled = ros.fit_resample(X.reshape(len(X), -1), y)  # Reshape MFCC features to 2D for resampling
X_resampled = X_resampled.reshape(-1, 5 * CONFIG.N_MFCC, CONFIG.N_MFCC_LEN)  # Restore original shape with concatenated features
y_resampled = torch.tensor(y_resampled).long()  # Convert to integer type
y_resampled = torch.nn.functional.one_hot(y_resampled, num_classes=CONFIG.N_CLASSES).float()

# Data split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=CONFIG.SEED)

# PyTorch Dataset class definition
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        return self.mfcc[index], self.label[index]

train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

# WaveNet model definition with Dropout regularization
class WaveNet(nn.Module):
    def __init__(self, num_inputs, num_channels, dilation_depth=9, kernel_size=2, dropout=0.3):
        super(WaveNet, self).__init__()
        self.dilation_depth = dilation_depth
        self.kernel_size = kernel_size
        self.num_channels = num_channels
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout)
        
        self.dilated_convs = nn.ModuleList()
        for i in range(dilation_depth):
            if i == 0:
                self.dilated_convs.append(nn.Conv1d(num_inputs, num_channels, kernel_size, dilation=2**i, padding=2**i))
            else:
                self.dilated_convs.append(nn.Conv1d(num_channels, num_channels, kernel_size, dilation=2**i, padding=2**i))
        
        self.fc = nn.Linear(num_channels, CONFIG.N_CLASSES)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        for layer in self.dilated_convs:
            x = layer(x)
            x = self.relu(x)
            x = self.dropout(x)
        
        x = x[:, :, -1]
        x = self.fc(x)
        return self.sigmoid(x)

# Evaluation functions definition
def expected_calibration_error(y_true, y_prob, n_bins=10):
    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=n_bins, strategy='uniform')
    bin_totals = np.histogram(y_prob, bins=np.linspace(0, 1, n_bins + 1), density=False)[0]
    non_empty_bins = bin_totals > 0
    bin_weights = bin_totals / len(y_prob)
    bin_weights = bin_weights[non_empty_bins]
    prob_true = prob_true[:len(bin_weights)]
    prob_pred = prob_pred[:len(bin_weights)]
    ece = np.sum(bin_weights * np.abs(prob_true - prob_pred))
    return ece

def auc_brier_ece(answer_df, submission_df):
    # Check for missing values in submission_df
    if submission_df.isnull().values.any():
        raise ValueError("The submission dataframe contains missing values.")

    # Check if the number and names of columns are the same in both dataframes
    if len(answer_df.columns) != len(submission_df.columns) or not all(answer_df.columns == submission_df.columns):
        raise ValueError("The columns of the answer and submission dataframes do not match.")
        
    submission_df = submission_df[submission_df.index.isin(answer_df.index)]
    submission_df.index = range(submission_df.shape[0])
    
    # Calculate AUC for each class
    auc_scores = []
    for column in answer_df.columns:
        y_true = answer_df[column]
        y_scores = submission_df[column]
        auc = roc_auc_score(y_true, y_scores)
        auc_scores.append(auc)

    # Calculate mean AUC
    mean_auc = np.mean(auc_scores)

    brier_scores = []
    ece_scores = []
    
    # Calculate Brier Score and ECE for each class
    for column in answer_df.columns:
        y_true = answer_df[column].values
        y_prob = submission_df[column].values
        
        # Brier Score
        brier = mean_squared_error(y_true, y_prob)
        brier_scores.append(brier)
        
        # ECE
        ece = expected_calibration_error(y_true, y_prob)
        ece_scores.append(ece)
    
    # Calculate mean Brier Score and mean ECE
    mean_brier = np.mean(brier_scores)
    mean_ece = np.mean(ece_scores)
    
    # Calculate combined score
    combined_score = 0.5 * (1 - mean_auc) + 0.25 * mean_brier + 0.25 * mean_ece
    
    return combined_score

# Model training function with learning rate scheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau

def train_with_scheduler(model, optimizer, scheduler, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    val_labels = y_val.cpu().numpy()
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score, val_outputs = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        
        combined_score = auc_brier_ece(pd.DataFrame(val_labels), pd.DataFrame(val_outputs))
        
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}] Combined Score: [{combined_score:.5f}]')
            
        scheduler.step(_val_loss)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score, all_probs

# Model definition
model = WaveNet(num_inputs=5 * CONFIG.N_MFCC, num_channels=64, dilation_depth=9, dropout=0.3)  # Adjusted for WaveNet structure
optimizer = torch.optim.AdamW(params=model.parameters(), lr=CONFIG.LR)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, verbose=True)

# Model training with scheduler
infer_model = train_with_scheduler(model, optimizer, scheduler, train_loader, val_loader, device)

# Unlabeled data pseudo-labeling
unlabeled_path = './unlabeled_data'
unlabeled_files = [os.path.join(unlabeled_path, f) for f in os.listdir(unlabeled_path) if f.endswith('.ogg')]

def get_mfcc_feature_from_files(file_paths):
    features = []
    for file_path in tqdm(file_paths):
        y, sr = librosa.load(file_path, sr=CONFIG.SR)
        
        # Time Stretching
        stretched_y = librosa.effects.time_stretch(y, rate=0.8)
        
        # MFCC
        mfcc = librosa.feature.mfcc(y=stretched_y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        
        # Chroma
        chroma = librosa.feature.chroma_stft(y=stretched_y, sr=sr)
        
        # Mel spectrogram
        mel = librosa.feature.melspectrogram(y=stretched_y, sr=sr)
        
        # Spectral contrast
        contrast = librosa.feature.spectral_contrast(y=stretched_y, sr=sr)
        
        # Tonnetz
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(stretched_y), sr=sr)
        
        # Frequency Masking on Chroma
        masker = np.random.uniform(low=0.0, high=1.0, size=chroma.shape) < 0.2
        chroma[masker] = 0.0
        
        # Fix length
        mfcc = librosa.util.fix_length(mfcc, size=CONFIG.N_MFCC_LEN, axis=1)
        chroma = librosa.util.fix_length(chroma, size=CONFIG.N_MFCC_LEN, axis=1)
        mel = librosa.util.fix_length(mel, size=CONFIG.N_MFCC_LEN, axis=1)
        contrast = librosa.util.fix_length(contrast, size=CONFIG.N_MFCC_LEN, axis=1)
        tonnetz = librosa.util.fix_length(tonnetz, size=CONFIG.N_MFCC_LEN, axis=1)
        
        # Concatenate all features
        feature = np.concatenate([mfcc, chroma, mel, contrast, tonnetz], axis=0)
        features.append(feature)
    return features

unlabeled_features = get_mfcc_feature_from_files(unlabeled_files)
unlabeled_features = np.array(unlabeled_features)
unlabeled_dataset = CustomDataset(unlabeled_features, torch.zeros((len(unlabeled_features), CONFIG.N_CLASSES)))
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

def pseudo_labeling(model, loader, device):
    model.to(device)
    model.eval()
    pseudo_labels = []
    with torch.no_grad():
        for features, _ in tqdm(iter(loader)):
            features = features.float().to(device)
            probs = model(features)
            pseudo_labels.append(probs.cpu().detach().numpy())
    return np.concatenate(pseudo_labels, axis=0)

# Pseudo-labeling based on model predictions on unlabeled data
pseudo_labels = pseudo_labeling(infer_model, unlabeled_loader, device)
pseudo_labels = (pseudo_labels > 0.9).astype(int)  # Threshold of 0.9 for pseudo-labeling

# Create a CustomDataset for pseudo-labeled data
pseudo_labeled_dataset = CustomDataset(unlabeled_features, torch.tensor(pseudo_labels).float())

# Combine the original training dataset with pseudo-labeled data
train_dataset_combined = torch.utils.data.ConcatDataset([train_dataset, pseudo_labeled_dataset])

# Create a new DataLoader for combined training dataset
train_loader_combined = DataLoader(train_dataset_combined, batch_size=CONFIG.BATCH_SIZE, shuffle=True)

# Train the model with pseudo-labeled data included
infer_model = train_with_scheduler(model, optimizer, scheduler, train_loader_combined, val_loader, device)

# After re-training with pseudo-labeled data, continue with the test set inference
test = pd.read_csv('./test.csv')
test_features, _ = get_mfcc_feature(test, False)
test_features = np.array(test_features)
test_dataset = CustomDataset(test_features, torch.zeros((len(test_features), CONFIG.N_CLASSES)))
test_loader = DataLoader(test_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

# Function for inference on test set
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for features, _ in tqdm(test_loader):
            features = features.float().to(device)
            outputs = model(features)
            preds.append(outputs.cpu().numpy())
    preds = np.concatenate(preds, axis=0)
    return preds

# Perform inference on the test set using the updated model
preds = inference(infer_model, test_loader, device)

# Prepare submission
submit = pd.read_csv('./sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.to_csv('./wavenet4.csv', index=False)

# Evaluate combined score before test submission
combined_score_before_test = auc_brier_ece(pd.DataFrame(y_val.cpu().numpy()), pd.DataFrame(preds))
print(f'Final Combined Score before test: {combined_score_before_test:.5f}')
