In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import librosa

import scipy
#from scipy.stats import skew

#from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# 모델 학습 시 사용
from sklearn.svm import SVC
#from sklearn import svm, metrics
from sklearn.utils import shuffle

# 학습률 곡선 그리기
from sklearn.model_selection import learning_curve

# 모델 저장 시 이용
import pickle
import joblib

In [None]:
if __name__ == '__train__':
    print(sys.argv[0]) # train.py
    print(sys.argv[1]) # a
    file_path = sys.argv[1]

# 파일형식 변환

In [None]:
# train_ctl 파일 읽어온 뒤 리스트에 담기
def raw_to_wav(file_path):
    with open(file_path) as f:
        lines = f.readlines()

    # 파일명을 담은 리스트를 기준으로 raw -> wav파일로 변환
    train_files_names = [i.strip("\n") for i in lines] # \n값 제거
    for i in train_files_names:
        with open("raw16k/train/{0}.raw".format(i), "rb") as inp_f:
            data = inp_f.read()
            with wave.open("raw16k/train/{0}.wav".format(i), "wb") as out_f:
                out_f.setnchannels(1) # 바이트 순서 Little Endian, 채널: 1 모노
                out_f.setsampwidth(2) # number of bytes (16bit = 2byte)
                out_f.setframerate(16000)
                out_f.writeframesraw(data)
                
def test_raw_to_wav(file_path):
    with open(file_path) as f:
        lines = f.readlines()

    # 파일명을 담은 리스트를 기준으로 raw -> wav파일로 변환
    test_files_names = [i.strip("\n") for i in lines] # \n값 제거
    for i in test_files_names:
        with open("raw16k/test/{0}.raw".format(i), "rb") as inp_f:
            data = inp_f.read()
            with wave.open("raw16k/test/{0}.wav".format(i), "wb") as out_f:
                out_f.setnchannels(1) # 바이트 순서 Little Endian, 채널: 1 모노
                out_f.setsampwidth(2) # number of bytes (16bit = 2byte)
                out_f.setframerate(16000)
                out_f.writeframesraw(data)
raw_to_wav(file_path)

# 01 데이터 전처리

In [2]:
def train_dataset(file_path):
    file_path = 'fmcc_train.ctl'
    dataset = []
    with open(file_path) as f:
        lines = f.readlines()
    train_files_names = [i.strip("\n") for i in lines] # \n값 제거
    
    for train_file in train_files_names:
        file_name='raw16k/train/' + train_file + ".wav"
        audio, sr = librosa.load(file_name, sr=16000)
        # 남/녀 별로 labeling
        # 0 : 남자 , 1: 여자
        if "M" in train_file[0]:
            #dataset.append([audio, "male"])
            dataset.append([file_name, audio, 0])
        elif "F" in train_file[0]:
            #dataset.append([audio, "feml"])
            dataset.append([file_name, audio, 1])
    
    print("TrainDataset 생성 완료")
    return pd.DataFrame(dataset,columns=['fname', 'data','label'])


def test_dataset(test_file_path):
    file_path = 'fmcc_test900_ref.txt'
    dataset = []
    with open(file_path) as f:
        lines = f.readlines()
    test_files_names = [i.strip("\n") for i in lines] # \n값 제거
    for test_file in test_files_names:
        test_file = test_file.split(" ")
        #fname = 'raw16k/test/' + test_file[0]
        fname = test_file[0]
        audio, sr = librosa.load('raw16k/test/' + fname + ".wav", sr=16000)
        if test_file[1] == "feml":
            #dataset.append([fname+".raw", audio, "feml"])
            dataset.append(['/home/wikim/exp/raw16k/test/'+fname+".raw", audio])
        elif test_file[1] == "male":
            #dataset.append([fname+".raw", audio, "male"])
            dataset.append(['/home/wikim/exp/raw16k/test/'+fname+".raw", audio])
    
    print("TestDataset 생성 완료")
    return pd.DataFrame(dataset, columns=['fname','data','label'])


# 음성의 길이 중 가장 긴 길이를 구합니다.

def get_max(data):

    max_data = -999
    for i in data:
        if len(i) > max_data:
            max_data = len(i)

    return max_data


def zero_pad(data, max_length):
    padded_data = []
    for d in data:
        if len(d) < max_length:
            pad_width = max_length - len(d)
            padded_d = np.pad(d, (0, pad_width), mode='constant')
        else:
            padded_d = d[:max_length]
        padded_data.append(padded_d)
    return np.array(padded_data)

In [3]:
train_wav = train_dataset()
#test_wav = test_dataset()

TrainDataset 생성 완료
TestDataset 생성 완료


In [4]:
train_x = np.array(train_wav.data)
#test_x = np.array(test_wav.data)

train_max = get_max(train_x)
#test_max = get_max(test_x)

max_data = np.max(train_max)
print('가장 긴 길이 :', max_data)

가장 긴 길이 : 39040


In [5]:
train_x = zero_pad(train_x, max_data)
#test_x = zero_pad(test_x, max_data)
#print(train_x[0])
#print('train :', train_x.shape) #(데이터셋 개수, 음성 길이)
#print('test :', test_x.shape)

[ 0.         -0.00064087 -0.00036621 ...  0.          0.
  0.        ]
train : (10000, 39040)
test : (900, 39040)


# 02 특징 추출

In [6]:
def preprocess_dataset(data):
    mfccs = []
    for i in data:
        mfcc = librosa.feature.mfcc(y=i,sr=16000,n_mfcc=40,   # n_mfcc:return 될 mfcc의 개수를 정해주는 파라미터, 더 다양한 데이터 특징을 추출하려면 값을 증가시키면 됨. 일반적으로 40개 추출
                                                  n_fft=400,  # n_fft:frame의 length를 결정하는 파라미터 
                                                  hop_length=160) # hop_length의 길이만큼 옆으로 가면서 데이터를 읽음(10ms기본)
        mfccs.append(mfcc.flatten())
    return pd.DataFrame(mfccs)

In [7]:
train_mfccs = preprocess_dataset(train_x)
#train_mfccs = np.array(train_mfccs)

#test_mfccs = preprocess_dataset(test_x)
#test_mfccs = np.array(test_mfccs)

#train_mfccs.head()

In [8]:
# 데이터셋 재설정하기
train_set = pd.DataFrame()
train_set['fname'] = train_wav['fname']
#test_set = pd.DataFrame()
#test_set['fname'] = test_wav['fname']

train_set = pd.concat([train_set,train_mfccs],axis=1)
train_set['label'] = train_wav['label']
#test_set = pd.concat([test_set,test_mfccs],axis=1)
#test_set['label'] = test_wav['label']

In [9]:
# 데이터셋 (train, test) 셔플
shuffle_train=shuffle(train_set, random_state = 20)
#shuffle_test=shuffle(test_set, random_state = 20)
#shuffle_test_fname = shuffle_test.fname.values  # 이후 결과 txt 파일을 만들기 위해 fname 저장
shuffle_train

Unnamed: 0,fname,0,1,2,3,4,5,6,7,8,...,12241,12242,12243,12244,12245,12246,12247,12248,12249,label
9957,raw16k/train/MLWS0/MLWS0_pbw1158.wav,-340.279999,-319.469788,-321.478333,-323.552002,-325.117920,-340.695007,-360.244751,-385.219635,-384.449554,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1687,raw16k/train/FJYJ1/FJYJ1_pbw1088.wav,-243.454285,-253.382156,-255.794922,-251.328857,-268.862457,-270.967194,-261.371918,-260.865417,-250.122711,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2116,raw16k/train/FKEJ0/FKEJ0_pbw1117.wav,-452.835297,-443.617828,-454.529083,-458.200867,-487.246490,-481.331848,-496.856873,-506.302948,-504.795349,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
231,raw16k/train/FGBS0/FGBS0_pbw1032.wav,-430.351044,-396.328156,-397.207031,-387.942505,-402.910583,-400.541595,-394.241272,-405.073914,-417.138000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2780,raw16k/train/FLGH0/FLGH0_pbw1181.wav,-268.635345,-249.424744,-228.871628,-225.276352,-214.906647,-206.776642,-198.894623,-210.790924,-215.549133,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3915,raw16k/train/FLSH1/FLSH1_pbw1116.wav,-331.727173,-311.308472,-314.836609,-309.803864,-316.015656,-314.702698,-307.348328,-322.848846,-309.810944,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
9620,raw16k/train/MLWJ1/MLWJ1_pbw1021.wav,-286.912506,-257.790741,-249.507294,-249.536011,-210.922867,-197.607346,-178.165115,-199.741699,-180.210815,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
7068,raw16k/train/MJSG0/MJSG0_pbw1069.wav,-438.635284,-421.745850,-409.365875,-385.705475,-391.160980,-362.075348,-365.659912,-354.657684,-347.104218,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
7391,raw16k/train/MJST0/MJST0_pbw1192.wav,-252.588943,-208.159775,-208.322144,-216.950012,-224.177795,-221.013977,-228.688919,-238.187302,-228.476761,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [10]:
#print(train_set.shape)
#print(test_set.shape)

(10000, 12252)
(900, 12252)


In [11]:
X = shuffle_train.drop(['label', 'fname'], axis=1)
feature_names = list(X.columns)  # 특징 번호 리스트

X = X.values  # 특징벡터 값 전체

y=shuffle_train.label.values
#y_test=shuffle_test.label.values
#print(y)

[0 1 1 ... 0 0 1]


In [12]:
# test 데이터 label, fname 컬럼 삭제 (특징 데이터만 호출하기 위함)
#X_test = shuffle_test.drop(['label', 'fname'], axis=1)
#X_test = X_test.values

In [13]:
#print(X.shape)
#print(X_test.shape)

(10000, 12250)
(900, 12250)


In [14]:
# scaling(정규화)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
#X_test_scaled = scaler.transform(X_test)

X_scaled.shape

(10000, 12250)

# SVM 모델 학습 및 모델 저장

In [None]:
# 기존 트레인셋을 분할하여 정확도 테스트
# Fit an SVM model
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size = 0.2, random_state = 30, shuffle = True)

clf = SVC(kernel = 'rbf', C = 10, gamma = 0.01, probability=True)
clf.fit(X_train, y_train)

print(accuracy_score(clf.predict(X_val), y_val))

In [None]:
#clf = SVC(kernel = 'rbf', C = 10, gamma = 0.01, probability=True)  # 0.9477777777777778
#clf.fit(X_scaled, y)
#print(accuracy_score(clf.predict(X_test_scaled), y_test)) # 정확도 출력

In [None]:
saved_model = pickle.dumps(clf, 'svm.pkl')

In [15]:
# 예측결과 txt 파일로 변환 및 저장
def to_txt_test(y_pred):
    str_labels = pd.Series(y_pred).map({0: 'male', 1: 'feml'})
    
    test_predict_df = pd.DataFrame()
    test_predict_df['fname'] = shuffle_test_fname
    test_predict_df['y_pred'] = str_labels
    df_sorted = test_predict_df.sort_values(["fname"])
    
    df_sorted.to_csv('강력한컴공_test_results.txt', sep = " ", index=False, header=False, lineterminator='\n')
    print('강력한컴공_test_results.txt 생성완료')
    

In [None]:
# 저장된 파일 크기 확인
file_size = os.path.getsize('svm.pkl')
print("Model file size (bytes):", file_size)

In [None]:
clf_from_pickle = pickle.loads('svm.pkl')
clf_from_pickle.predict(X_test_scaled)

y_pred = clf.predict(X_test_scaled) # 예측값 호출
predict_df = to_txt_test(y_pred) # 예측 값 호출 후 txt파일 저장

# 학습률 곡선 그리기

In [None]:
def plot_learning_curve(estimator, X, y):
    train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, train_sizes=np.linspace(0.1, 1.0, 100), cv=5)
    
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)
    
    plt.figure(figsize=(8, 6))
    plt.plot(train_sizes, train_mean, 'o-', color='r', label='Training Score')
    plt.plot(train_sizes, test_mean, 'o-', color='g', label='Validation Score')
    plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='r')
    plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color='g')
    plt.xlabel('Training Set Size')
    plt.ylabel('Score')
    plt.title('Learning Curve')
    plt.legend(loc='best')
    plt.grid(True)
    plt.show()
    
def plot_loss_curve(estimator, X, y, train_sizes, cv):
    train_sizes, train_losses, test_losses = learning_curve(
        estimator, X, y, train_sizes=train_sizes, cv=cv, scoring='neg_mean_squared_error')

    train_losses_mean = -np.mean(train_losses, axis=1)
    train_losses_std = np.std(train_losses, axis=1)
    test_losses_mean = -np.mean(test_losses, axis=1)
    test_losses_std = np.std(test_losses, axis=1)

    plt.figure()
    plt.title('Loss Curve')
    plt.xlabel('Training Examples')
    plt.ylabel('Loss')

    plt.grid()

    plt.fill_between(train_sizes, train_losses_mean - train_losses_std,
                     train_losses_mean + train_losses_std, alpha=0.1, color='r')
    plt.fill_between(train_sizes, test_losses_mean - test_losses_std,
                     test_losses_mean + test_losses_std, alpha=0.1, color='g')
    plt.plot(train_sizes, train_losses_mean, 'o-', color='r', label='Training Loss')
    plt.plot(train_sizes, test_losses_mean, 'o-', color='g', label='Cross-validation Loss')

    plt.legend(loc='best')
    plt.show()


In [None]:
estimator = svm.SVC(probability=True)  # 학습에 사용할 모델 또는 추정기
X = X_scaled  # 학습 데이터
y = y # 레이블

In [None]:
plot_learning_curve(estimator, X, y)