## Import

In [None]:
import random
import pandas as pd
import numpy as np
import os
import librosa

from tqdm.auto import tqdm

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

import warnings
warnings.filterwarnings(action='ignore') 

## Load Data

In [None]:
def preprocessing(file_train):
    train_df = pd.read_csv(file_train)
    
    train_x = train_df.drop(columns=['id', 'covid19'])
    train_y = train_df['covid19']
    
    def onehot_encoding(ohe, x):
        # 학습데이터로 부터 fit된 one-hot encoder (ohe)를 받아 transform 시켜주는 함수
        encoded = ohe.transform(x['gender'].values.reshape(-1,1))
        encoded_df = pd.DataFrame(encoded, columns=ohe.categories_[0])
        x = pd.concat([x.drop(columns=['gender']), encoded_df], axis=1)
        return x

    # 'gender' column의 경우 추가 전처리가 필요 -> OneHotEncoder 적용
    ohe = OneHotEncoder(sparse=False)
    ohe.fit(train_x['gender'].values.reshape(-1,1))
    train_x = onehot_encoding(ohe, train_x)
    
    
    model = MLPClassifier(random_state=41)
    model.fit(train_x, train_y)
    
    pred = model.predict(train_x)
    print(f"accuracy score: {accuracy_score(train_y, pred)}")
    print(confusion_matrix(train_y, pred))
    print(f1_score(train_y, pred))
    

### MFCC 32

In [None]:
preprocessing('./train_mfcc_data.csv')



accuracy score: 0.9038107752956636
[[3348  151]
 [ 215   91]]
0.3321167883211679


### MFCC 16

In [None]:
preprocessing('./train_mfcc_data(16).csv')

accuracy score: 0.9195795006570302
[[3496    3]
 [ 303    3]]
0.019230769230769232


### MFCC 64

In [None]:
preprocessing('./train_mfcc_data(64).csv')

accuracy score: 0.9256241787122208
[[3491    8]
 [ 275   31]]
0.17971014492753623
