In [1]:
%%time
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torch.nn.init as init
import torchvision.transforms as transforms
import librosa
import librosa.display
import IPython.display

MFCC_X = 20
MFCC_Y = 44
batch_size     =   10
learning_rate  =   0.0002
num_epoch      =   10

Wall time: 9.16 s


In [2]:
def _path(loc, directory, filename, redundant, format_):
    delimiter = '\\'
    path = ( loc + delimiter + directory + delimiter +
           filename + redundant + format_)
    return path

In [3]:
def _normalize(mfcc):
    return (mfcc - np.min(mfcc)) / (np.max(mfcc) - np.min(mfcc))

In [4]:
def _fit_size(arr, size):
    zeros = np.zeros( (len(arr), size - len(arr[0]) ) )
    arr = np.append(arr, zeros, axis = 1)
    return arr

In [5]:
def _mfcc(path):
    y, sr = librosa.load(path)
    mfcc = librosa.feature.mfcc(y = y, sr = sr)
    mfcc = _fit_size(mfcc, MFCC_Y)
    return  _normalize(mfcc)

In [6]:
class WordDataset(Dataset):
    def __init__(self, train = True):
        label_list = [0, 1, 2, 3]
        self.label_word = ['yes', 'no', 'on', 'off']
        self.train = train
        num_data   =  2100
        num_train  =  2000
        num_test   =  100

        if self.train == True: 
            self.train_data   =  []
            self.train_label  =  []
            
            print("\n\n==== Train Data:")
            for item in label_list:
                print("===", end = "")
                for i in range(1, num_train + 1):
                    path = _path(loc = '..\\테스트\\dataset', 
                              directory = self.label_word[item],filename = self.label_word[item],
                                 redundant = ' (' + str(i) + ')', format_ = '.wav')
                    mfcc = _mfcc(path)
                    self.train_data.append(mfcc)
                    self.train_label.append(item)
                    
            self.train_label = np.array(self.train_label)
            self.train_data = np.concatenate(self.train_data)
            self.train_data = self.train_data.reshape(num_train * 4, 1, MFCC_X, MFCC_Y)
            print("=== Dataset Download Complete !!")
            
        else:
            self.test_data   =  []
            self.test_label  =  []
            
            print("\n\n=== Test Data:")
            for item in label_list:
                print("===", end = "")
                for i in range(num_train + 1, num_data + 1):              
                    path = _path(loc = '..\\테스트\\dataset', 
                              directory = self.label_word[item],filename = self.label_word[item],
                                 redundant = ' (' + str(i) + ')', format_ = '.wav')
                    mfcc = _mfcc(path)
                    self.test_data.append(mfcc)
                    self.test_label.append(item)
                    
            self.test_label = np.array(self.test_label)
            self.test_data = np.concatenate(self.test_data)
            self.test_data = self.test_data.reshape(num_test * 4, 1, MFCC_X, MFCC_Y)
            print("=== Dataset Download Complete !!")
        
    def __getitem__(self, index):
        if self.train:
            return self.train_data[index], self.train_label[index]
        else:
            return self.test_data[index], self.test_label[index]
    
    def __len__(self):
        if self.train:
            return len(self.train_data)
        else:
            return len(self.test_data)

In [7]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]

In [8]:
class MatMul:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x = None

    def forward(self, x):
        W, = self.params
        out = np.dot(x, W)
        self.x = x
        return out

    def backward(self, dout):
        W, = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        self.grads[0][...] = dW
        return dx

In [9]:
class Sigmoid:
    def __init__(self):
        self.params, self.grads = [], []
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [10]:
class SoftmaxWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.y = None  # softmax의 출력
        self.t = None  # 정답 레이블

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)

        # 정답 레이블이 원핫 벡터일 경우 정답의 인덱스로 변환
        if self.t.size == self.y.size:
            self.t = self.t.argmax(axis=1)

        loss = cross_entropy_error(self.y, self.t)
        return loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]

        dx = self.y.copy()
        dx[np.arange(batch_size), self.t] -= 1
        dx *= dout
        dx = dx / batch_size

        return dx

In [11]:
class Affine:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None

    def forward(self, x):
        W, b = self.params
        out = np.dot(x, W) + b
        self.x = x
        return out

    def backward(self, dout):
        W, b = self.params
        dx = np.dot(dout, W.T)
        dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis=0)

        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [12]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 가중치와 편향 초기화
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        # 계층 생성
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다.
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

In [13]:
%%time
# 데이터 읽기, 모델과 옵티마이저 생성
test_dataset = WordDataset(train = False)
train_dataset = WordDataset(train = True)
train_loader = DataLoader(train_dataset, batch_size = 10, shuffle = True, num_workers = 0, drop_last = True)
test_loader = DataLoader(test_dataset, batch_size = 10, shuffle = True, num_workers = 0, drop_last = True)



=== Test Data:


==== Train Data:


NameError: name 'hidden_size' is not defined

In [14]:
hidden_size = 10
model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
optimizer = SGD(lr=learning_rate)

In [16]:
for epoch in range(num_epoch):
    for [audio, label] in train_loader:
        batch_x = audio
        batch_t = label
        # 기울기를 구해 매개변수 갱신
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(mowdel.params, model.grads)

        total_loss += loss
        loss_count += 1

        # 정기적으로 학습 경과 출력
        if (iters+1) % 10 == 0:
            avg_loss = total_loss / loss_count
            print('| Epoch %d |  Iter %d / %d | Loss %.2f'
                  % (epoch + 1, iters + 1, max_iters, avg_loss))
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0


# 학습 결과 플롯
plt.plot(np.arange(len(loss_list)), loss_list, label='train')
plt.xlabel('반복 (x10)')
plt.ylabel('손실')
plt.show()

# 경계 영역 플롯
h = 0.001
x_min, x_max = x[:, 0].min() - .1, x[:, 0].max() + .1
y_min, y_max = x[:, 1].min() - .1, x[:, 1].max() + .1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
X = np.c_[xx.ravel(), yy.ravel()]
score = model.predict(X)
predict_cls = np.argmax(score, axis=1)
Z = predict_cls.reshape(xx.shape)
plt.contourf(xx, yy, Z)
plt.axis('off')

# 데이터점 플롯
x, t = spiral.load_data()
N = 100
CLS_NUM = 3
markers = ['o', 'x', '^']
for i in range(CLS_NUM):
    plt.scatter(x[i*N:(i+1)*N, 0], x[i*N:(i+1)*N, 1], s=40, marker=markers[i])
plt.show()


ValueError: shapes (10,1,20,44) and (2,10) not aligned: 44 (dim 3) != 2 (dim 0)