# [음성 분류 경진대회](https://dacon.io/competitions/official/235905/overview/description)

In [1]:
import librosa
import librosa.display as dsp
from IPython.display import Audio

In [2]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [3]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(929)

## 데이터 불러오기

In [4]:
import pandas as pd
train = pd.read_csv('data/train.csv')
train.head()

Unnamed: 0,file_name,label
0,001.wav,9
1,002.wav,0
2,004.wav,1
3,005.wav,8
4,006.wav,0


In [5]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  400 non-null    object
 1   label      400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ KB


## 데이터 전처리

In [6]:
def train_dataset():
    folder = "data/train/"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000)
            class_label = int(train[train.file_name == file].label)
            dataset.append([data,class_label])

    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [7]:
train_wav = train_dataset()

100%|[32m██████████[0m| 400/400 [00:09<00:00, 41.96it/s]

Dataset 생성 완료





In [8]:
train_wav.head()

Unnamed: 0,data,label
0,"[-9.958636e-05, -0.00018014791, -0.0001873678,...",4
1,"[4.732452e-05, 0.00010305498, 8.2738516e-05, 7...",0
2,"[0.00015033052, 0.00025637593, 0.00026738388, ...",7
3,"[-0.0005239519, -0.00088986446, -0.00075263606...",6
4,"[-0.00015332195, -0.00029302362, -8.321165e-05...",3


## 음성 데이터 특징 추출

In [9]:
def preprocess_train_dataset(data):
    mfccs = []
    for i in data:
        extracted_features = librosa.feature.mfcc(y=i,
                                                sr=16000,
                                                n_mfcc=40)
        extracted_features = np.mean(extracted_features.T,axis=0)
        mfccs.append(extracted_features)
            
    return mfccs

mfccs = preprocess_train_dataset(train_wav.data)
mfccs = np.array(mfccs)