# Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import librosa
import librosa.display as dsp
from IPython.display import Audio

from tqdm.auto import tqdm

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
import torch
# import torchaudio

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #GPU 할당

# Hyperparameter Setting

In [3]:
# CFG = {
#     'SR':16000,
#     'N_MFCC':32, # MFCC 벡터를 추출할 개수
#     'SEED':42
# }

# Fixed Random-Seed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [5]:
# def seed_everything(seed):
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)

# seed_everything(CFG['SEED']) # Seed 고정

# Data Pre-Processing 1

In [6]:
train = pd.read_csv('./train_data.csv')
test = pd.read_csv('./test_data.csv')
# unlabeled_df = pd.read_csv('./unlabeled_data.csv')

In [7]:
print(train.shape, test.shape)

(3805, 6) (5732, 5)


In [8]:
data, sample_rate = librosa.load('./wav_dataset/train/00001.wav', sr = 16000)
print('sample_rate:', sample_rate, ', audio shape:', data.shape)
print('length:', data.shape[0]/float(sample_rate), 'secs')

sample_rate: 16000 , audio shape: (78720,)
length: 4.92 secs


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [9]:
def get_mfcc_feature(df, data_type, save_path):
    # Data Folder path
    root_folder = './wav_dataset'
    if os.path.exists(save_path):
        print(f'{save_path} is exist.')
        return
    features = []
    for uid in tqdm(df['id']):
        root_path = os.path.join(root_folder, data_type)
        path = os.path.join(root_path, str(uid).zfill(5)+'.wav')

        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=CFG['SR'])
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])

        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)
    
    # 기존의 자가진단 정보를 담은 데이터프레임에 추출된 오디오 Feature를 추가
    mfcc_df = pd.DataFrame(features, columns=['mfcc_'+str(x) for x in range(1,CFG['N_MFCC']+1)])
    df = pd.concat([df, mfcc_df], axis=1)
    df.to_csv(save_path, index=False)
    print('Done.')

In [10]:
get_mfcc_feature(train, 'train', './train_mfcc_data.csv')
get_mfcc_feature(test, 'test', './test_mfcc_data.csv')

./train_mfcc_data.csv is exist.
./test_mfcc_data.csv is exist.


In [11]:
# get_mfcc_feature2('unlabeled', './unlabeled_mfcc_data.csv')

In [12]:
train_mfcc_data = pd.read_csv('./train_mfcc_data.csv')
test_mfcc_data = pd.read_csv('./test_mfcc_data.csv')

train_mfcc_data

Unnamed: 0,id,age,gender,respiratory_condition,fever_or_muscle_pain,covid19,mfcc_1,mfcc_2,mfcc_3,mfcc_4,...,mfcc_23,mfcc_24,mfcc_25,mfcc_26,mfcc_27,mfcc_28,mfcc_29,mfcc_30,mfcc_31,mfcc_32
0,1,24,female,0,1,0,-276.01898,30.519340,-20.314617,-6.689037,...,-2.679408,2.454339,-1.176285,2.314315,-0.339533,2.514413,-4.784703,1.239072,-1.556883,-1.548770
1,2,51,male,0,0,0,-312.99362,54.141323,-1.748550,-9.437217,...,-7.248304,1.238725,-6.894970,-1.810402,-7.259594,0.715029,-1.372265,-1.760624,-2.735181,1.134190
2,3,22,male,0,0,0,-438.60306,46.675842,-22.771935,-3.527922,...,-0.136723,-1.707353,2.649277,1.208829,-0.033701,-1.008729,-0.687255,-0.472232,0.850565,0.353839
3,4,29,female,1,0,0,-369.26100,47.762012,-8.256503,-2.891349,...,-0.389230,4.033148,-2.658165,2.867084,1.679876,2.136411,0.289792,1.709179,-0.592465,1.754549
4,5,23,male,0,0,0,-535.68915,7.509357,-7.762263,2.567660,...,-0.279360,-0.292286,-1.559678,0.328864,-1.053423,0.844060,-0.788914,1.182740,-0.527028,1.208361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3800,3801,53,male,0,0,0,-329.53840,69.317080,-41.498420,3.398324,...,-0.447236,1.477676,-5.139314,0.058984,1.145387,3.968560,-4.152923,2.655675,-4.001793,-2.753481
3801,3802,25,male,0,0,0,-387.18520,74.883385,-13.053332,0.423010,...,-3.050042,4.993294,1.104003,6.089378,2.936215,3.518800,-0.500065,3.678556,2.742439,5.705073
3802,3803,26,female,0,0,0,-347.74250,58.946648,12.572187,28.366724,...,-2.563241,-2.590208,-2.765020,-1.786291,-3.289306,-1.355590,-0.098087,-1.539166,-1.679146,-1.346460
3803,3804,27,female,0,0,0,-180.65393,72.516770,-16.372229,-8.363718,...,-8.350143,6.216520,-2.514634,2.063279,-1.506954,0.092602,-4.883226,0.641473,-5.265770,-0.327298


In [13]:
# wav 파일의 MFCC Feature와 상태정보를 합친 학습데이터를 불러옵니다.
train_df = pd.read_csv('./train_mfcc_data.csv')

# 학습데이터를 모델의 input으로 들어갈 x와 label로 사용할 y로 분할
train_x = train_mfcc_data.drop(columns=['id', 'covid19'])
train_y = train_mfcc_data['covid19']

In [14]:
train_y.value_counts()

0    3499
1     306
Name: covid19, dtype: int64

In [15]:
def onehot_encoding(ohe, x):
    # 학습데이터로 부터 fit된 one-hot encoder (ohe)를 받아 transform 시켜주는 함수
    encoded = ohe.transform(x['gender'].values.reshape(-1,1))
    encoded_df = pd.DataFrame(encoded, columns=ohe.categories_[0])
    x = pd.concat([x.drop(columns=['gender']), encoded_df], axis=1)
    return x

In [16]:
# 'gender' column의 경우 추가 전처리가 필요 -> OneHotEncoder 적용
ohe = OneHotEncoder(sparse=False)
ohe.fit(train_x['gender'].values.reshape(-1,1))
train_x = onehot_encoding(ohe, train_x)
train_x

Unnamed: 0,age,respiratory_condition,fever_or_muscle_pain,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,...,mfcc_26,mfcc_27,mfcc_28,mfcc_29,mfcc_30,mfcc_31,mfcc_32,female,male,other
0,24,0,1,-276.01898,30.519340,-20.314617,-6.689037,-10.224930,-7.443150,-33.740433,...,2.314315,-0.339533,2.514413,-4.784703,1.239072,-1.556883,-1.548770,1.0,0.0,0.0
1,51,0,0,-312.99362,54.141323,-1.748550,-9.437217,4.317682,-0.148136,-17.331125,...,-1.810402,-7.259594,0.715029,-1.372265,-1.760624,-2.735181,1.134190,0.0,1.0,0.0
2,22,0,0,-438.60306,46.675842,-22.771935,-3.527922,-13.949551,0.344213,-9.082897,...,1.208829,-0.033701,-1.008729,-0.687255,-0.472232,0.850565,0.353839,0.0,1.0,0.0
3,29,1,0,-369.26100,47.762012,-8.256503,-2.891349,-21.302510,-8.495335,-11.653670,...,2.867084,1.679876,2.136411,0.289792,1.709179,-0.592465,1.754549,1.0,0.0,0.0
4,23,0,0,-535.68915,7.509357,-7.762263,2.567660,-5.632455,-0.318077,-6.422602,...,0.328864,-1.053423,0.844060,-0.788914,1.182740,-0.527028,1.208361,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3800,53,0,0,-329.53840,69.317080,-41.498420,3.398324,-0.727913,-29.284441,-8.326742,...,0.058984,1.145387,3.968560,-4.152923,2.655675,-4.001793,-2.753481,0.0,1.0,0.0
3801,25,0,0,-387.18520,74.883385,-13.053332,0.423010,-17.047607,-14.365114,-20.488728,...,6.089378,2.936215,3.518800,-0.500065,3.678556,2.742439,5.705073,0.0,1.0,0.0
3802,26,0,0,-347.74250,58.946648,12.572187,28.366724,20.244375,10.567720,6.461148,...,-1.786291,-3.289306,-1.355590,-0.098087,-1.539166,-1.679146,-1.346460,1.0,0.0,0.0
3803,27,0,0,-180.65393,72.516770,-16.372229,-8.363718,-27.403595,3.756918,-12.728548,...,2.063279,-1.506954,0.092602,-4.883226,0.641473,-5.265770,-0.327298,1.0,0.0,0.0


In [17]:
# 위의 학습데이터를 전처리한 과정과 동일하게 test data에도 적용
test_x = pd.read_csv('./test_mfcc_data.csv')
test_x = test_x.drop(columns=['id'])
# Data Leakage에 유의하여 train data로만 학습된 ohe를 사용
test_x = onehot_encoding(ohe, test_x)
test_x

Unnamed: 0,age,respiratory_condition,fever_or_muscle_pain,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,...,mfcc_26,mfcc_27,mfcc_28,mfcc_29,mfcc_30,mfcc_31,mfcc_32,female,male,other
0,48,1,0,-620.30580,9.424123,0.727018,2.465860,-0.261858,-1.100758,-3.199885,...,0.039260,-0.395975,-0.261139,-0.092119,-0.004060,-0.128369,-0.162123,1.0,0.0,0.0
1,24,0,0,-494.12106,8.865809,-3.128296,-2.435534,-3.131949,2.349788,-5.630660,...,-0.009176,-1.300374,0.349196,-0.747507,0.488091,-0.995132,0.394098,1.0,0.0,0.0
2,29,0,0,-1059.75440,-1.519202,-8.192215,0.108348,-6.175036,2.347849,-8.913836,...,0.130874,0.261509,1.180880,-2.915745,-3.374779,-0.757438,-3.469919,0.0,1.0,0.0
3,39,0,0,-362.80643,27.674362,-9.778632,10.285870,-1.541319,-3.504866,-14.483954,...,0.397847,-2.791587,0.033708,0.615491,2.018101,1.088379,0.863595,1.0,0.0,0.0
4,34,0,0,-388.05432,45.460460,-14.742424,-2.684571,-10.204907,10.337976,-1.117903,...,-5.373309,1.705544,3.001329,-5.252439,-0.027374,2.200471,-2.151908,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5727,43,0,0,-363.87415,48.446053,6.146007,12.088921,-8.301061,-1.040586,-11.340342,...,-0.560810,1.022309,-1.620644,-5.837281,-1.337036,-1.708651,-0.605050,0.0,1.0,0.0
5728,48,0,1,-650.64930,3.089389,1.821568,5.398409,-4.807905,-3.338209,-7.359063,...,0.428167,-0.812079,1.184415,-1.119913,0.296456,0.125727,-0.273791,1.0,0.0,0.0
5729,44,0,0,-335.87747,54.853344,-16.275513,14.433867,-9.924277,-5.135542,-28.151585,...,-1.240127,-2.499818,-1.289216,-4.292271,-0.031026,-1.064259,0.318412,1.0,0.0,0.0
5730,25,0,0,-535.33655,17.660162,-9.821951,5.716812,-8.438224,0.311916,-11.437690,...,-1.572410,-1.555100,-1.462370,-1.572655,-0.863414,-0.713437,0.072218,1.0,0.0,0.0


In [18]:
print(train_x.shape, train_y.shape, test_x.shape)

(3805, 38) (3805,) (5732, 38)


In [19]:
train_x = np.array(train_x)
test_x = np.array(test_x)

In [23]:
train_x

array([[24.,  0.,  1., ...,  1.,  0.,  0.],
       [51.,  0.,  0., ...,  0.,  1.,  0.],
       [22.,  0.,  0., ...,  0.,  1.,  0.],
       ...,
       [26.,  0.,  0., ...,  1.,  0.,  0.],
       [27.,  0.,  0., ...,  1.,  0.,  0.],
       [49.,  1.,  1., ...,  1.,  0.,  0.]])

In [24]:
# 음성의 길이 중 가장 작은 길이를 구합니다.

def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

#음성들의 길이를 맞춰줍니다.

mini = np.min([train_mini, test_mini])

In [25]:
print('가장 작은 길이 :', mini)

가장 작은 길이 : 38


In [26]:
def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

In [27]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (3805, 38)
test : (5732, 38)


# Data Pre-Processing 2

In [28]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=40)
extracted_features.shape

(40, 1)

In [33]:
extracted_features

array([[ 4.47631288e+02],
       [-1.31035432e+00],
       [ 9.05853981e+00],
       [ 5.41942175e+00],
       [ 1.09960780e+00],
       [-6.29049996e+00],
       [ 4.02097359e+00],
       [ 3.43274397e+00],
       [-7.38362587e-01],
       [-2.16334821e+00],
       [ 1.94315294e-01],
       [ 6.50638485e-01],
       [ 2.10119048e+00],
       [-2.30962041e+00],
       [ 9.72118145e-01],
       [-5.78225534e-01],
       [ 1.95791915e-01],
       [ 7.47172675e-01],
       [-7.98419565e-01],
       [-7.97831217e-03],
       [ 3.19757063e-01],
       [-2.08582702e-01],
       [ 5.12937712e-01],
       [-8.30460121e-01],
       [ 5.65980340e-01],
       [-6.66905805e-02],
       [-6.93358031e-02],
       [-1.06610303e-01],
       [ 3.24338471e-01],
       [-4.56710285e-01],
       [ 4.65941971e-01],
       [-3.19231084e-01],
       [ 1.70306970e-01],
       [-1.40852413e-01],
       [ 1.75763497e-01],
       [-1.89269690e-01],
       [ 2.21098630e-01],
       [-2.63266678e-01],
       [ 2.9

In [29]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=40)
        mfccs.append(extracted_features)
            
    return mfccs

In [31]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
#test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [32]:
np.array(train_mfccs).shape

(3805, 40, 1, 1)

In [34]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

ModuleNotFoundError: No module named 'torchvision'

In [None]:
def spec_to_image(spec, eps=1e-6):
  mean = spec.mean()
  std = spec.std()
  spec_norm = (spec - mean) / (std + eps)
  spec_min, spec_max = spec_norm.min(), spec_norm.max()
  spec_scaled = (spec_norm - spec_min) / (spec_max - spec_min)
  return spec_scaled

In [None]:
audio_mfcc_train = []
audio_mfcc_test = []
audio_mels_train = []
audio_mels_test = []

for data in train_x:
  extracted_features = librosa.feature.mfcc(y=data, sr=16000, n_mfcc=32)
  audio_mfcc_train.append(spec_to_image(extracted_features))

  extracted_features2 = librosa.feature.melspectrogram(y=data, sr=16000, n_mels=32)
  extracted_features2 = librosa.power_to_db(extracted_features2, ref=np.max)
  audio_mels_train.append(spec_to_image(extracted_features2))

# for data in test_x:
#   extracted_features = librosa.feature.mfcc(y=data, sr=16000, n_mfcc=32)
#   audio_mfcc_test.append(spec_to_image(extracted_features))

#   extracted_features2 = librosa.feature.melspectrogram(y=data, sr=16000, n_mels=32)
#   extracted_features2 = librosa.power_to_db(extracted_features2, ref=np.max)
#   audio_mels_test.append(spec_to_image(extracted_features2))

In [None]:
audio_mels_train

# Modeling

In [None]:
# import tensorflow as tf

In [None]:
# model = tf.keras.models.Sequential([
#   tf.keras.layers.Flatten(input_shape=(28, 28)),
#   tf.keras.layers.Dense(128, activation='relu'),
#   tf.keras.layers.Dropout(0.2),
#   tf.keras.layers.Dense(10, activation='softmax')
# ])

# model.compile(optimizer='adam',
#               loss='binary_crossentropy',
#               metrics=['accuracy'])

In [None]:
import torch.nn as nn

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(3, 3), padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3, 3), padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(3, 3), padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(2048, 10) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = torch.flatten(x, 1) # N차원 배열 -> 1차원 배열
        
        x = self.fc_layer(x)
        return x

model = CNNclassification()

In [None]:
pip install torchinfo

In [None]:
from torchinfo import summary

print(summary(model, input_size = (1,1,32,32)))

# Train

In [None]:
'''
mlp = MLPClassifier(random_state=CFG['SEED']) # Sklearn에서 제공하는 Multi-layer Perceptron classifier 사용
mlp.fit(train_x, train_y) # Model Train
'''

In [None]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader), disable=True):
            
            wav, label = wav.type(torch.FloatTensor), label.type(torch.long)

            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader), disable=True):

                wav, label = wav.type(torch.FloatTensor), label.type(torch.long)
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        #print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            torch.save(model.state_dict(), 'best_model.pth') #이 디렉토리에 best_model.pth을 저장

def predict_valid(model, vali_loader, device):
    model.eval()
    vali_loss = 0.0
    correct = 0
    with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
        for wav, label in tqdm(iter(vali_loader), disable=True):
            
            wav, label = wav.type(torch.FloatTensor), label.type(torch.long)

            wav, label = wav.to(device), label.to(device) #배치 데이터

            logit = model(wav)
            vali_loss += criterion(logit, label)

            pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
            correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
    vali_acc = 100 * correct / len(vali_loader.dataset)
    return vali_acc    

def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for wav in tqdm(iter(test_loader), disable=True):
            wav = wav.type(torch.FloatTensor)

            wav = wav.to(device)
            
            pred_logit = model(wav)

            model_pred.extend(pred_logit.tolist())
    return model_pred

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

label_encoder = LabelEncoder()

num_epochs = 70
batch_size = 32

skf = StratifiedKFold(n_splits = 10, random_state = 42, shuffle = True) #총 5번의 fold 진행

n = 0

pred_list = []

for train_index, valid_index in skf.split(audio_mfcc_train, label_encoder.fit_transform(train_y)):
  n += 1

  print("===== %d fold =====" %(n))

 
  ## mfcc
  X_train, X_valid = audio_mfcc_train[train_index], audio_mfcc_train[valid_index]
  y_train, y_valid = train_y[train_index], train_y[valid_index]
  '''
  train_dataset = CustomDataset(X=X_train, y=y_train)
  train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

  vali_dataset = CustomDataset(X=X_valid, y=y_valid)
  vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

  model = CNNclassification2().to(device)
  criterion = torch.nn.CrossEntropyLoss().to(device)
  optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)
  scheduler = None

  train(model, optimizer, train_loader, scheduler, device)

  test_dataset = CustomDataset(X=audio_mfcc_test, y= None, train_mode=False)
  test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

  # Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
  checkpoint = torch.load('best_model.pth')
  model = CNNclassification2().to(device)
  model.load_state_dict(checkpoint)

  print("%d fold mfcc score : %d%%" %(n, predict_valid(model, vali_loader, device)))

  mfcc_preds = predict(model, test_loader, device)
  '''

  ## melspectogram
  X_train, X_valid = audio_mels_train[train_index], audio_mels_train[valid_index]

  train_dataset = CustomDataset(X=X_train, y=y_train)
  train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

  vali_dataset = CustomDataset(X=X_valid, y=y_valid)
  vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

  model = CNNclassification().to(device)
  criterion = torch.nn.CrossEntropyLoss().to(device)
  optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)
  scheduler = None

  train(model, optimizer, train_loader, scheduler, device)

  test_dataset = CustomDataset(X=audio_mels_test, y= None, train_mode=False)
  test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

  # Validation Accuracy가 가장 뛰어난 모델을 불러옵니다.
  checkpoint = torch.load('best_model.pth')
  model = CNNclassification().to(device)
  model.load_state_dict(checkpoint)

  print("%d fold mels score : %d%%" %(n, predict_valid(model, vali_loader, device)))

  mels_preds = predict(model, test_loader, device)

  #pred_list.append(np.array(mfcc_preds) + np.array(mels_preds))
  pred_list.append(np.array(mels_preds))

# Inference

In [None]:
'''
# Model 추론
preds = model.predict(test_x)
'''

In [None]:
model.evaluate(test_x, batch_size=28)

In [None]:
# 정확도

print("훈련 세트 정확도 : {:.3f}".format(mlp.score(train_x, train_y)))
print("테스트 세트 정확도 : {:.3f}".format(mlp.score(test_x, preds)))

# Submission

In [None]:
submission = pd.read_csv('./sample_submission.csv')
submission['covid19'] = preds
submission.to_csv('./submit.csv', index=False)