In [7]:
import numpy as np
import librosa
import librosa.display
import os
import matplotlib.pyplot as plt
import pyaudio
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
%matplotlib inline

In [18]:
class melCNN(object):
    def __init__(self, sec=3, state=None, label=None):
        self.FORMAT = pyaudio.paFloat32
        self.SEC = sec
        self.STATE = state
        
        self.LABEL = label                
        self.labels = {0 : "doorbell",
                       1 : "fire_alarm",
                       2 : "hair_dry"}
        
        self.data_dir = os.path.dirname("data/")
        os.makedirs(self.data_dir, exist_ok=True)
        self.train_dir = os.path.join(self.data_dir, "train")
        os.makedirs(self.train_dir, exist_ok=True)        
        self.test_dir = os.path.join(self.data_dir, "test")
        os.makedirs(self.test_dir, exist_ok=True)
        self.model_dir = os.path.dirname("model/")
        os.makedirs(self.model_dir, exist_ok=True)
        
        if self.LABEL != None:
            self.label_dir = os.path.join(self.train_dir, self.labels[self.LABEL])
            os.makedirs(self.label_dir, exist_ok=True)
        
        self.CHANNELS = 1
        self.RATE = 44100
        self.n_fft = 1024
        self.hop_length = 1024
        self.n_mels = 128
        self.f_min = 20
        self.f_max = 8000
        
        self.count = 0
        self.data = None
        self.mel = None
        self.total_len = self.RATE * self.SEC
        self.total_data = np.zeros(self.total_len)
        
        self.pa = pyaudio.PyAudio()
            
        self.stream = self.pa.open(format=self.FORMAT,
                                   channels=self.CHANNELS,
                                   rate=self.RATE,
                                   input=True,
                                   output=False,
                                   frames_per_buffer=self.RATE)
        
        if self.STATE == None:
            pass
        else:
            self.loop()
        
    def loop(self):
        try:            
            while True:
                start = time.time()
                
                self.audioinput()
                self.pltmel()                
                
                if self.STATE == "test":
                    pred, acc = self.test()
                    print(f"{pred}\tAcc : {acc}")
                end = time.time()
                print(str(round(end-start, 3))+"\tsec")
                self.count += 1
        except KeyboardInterrupt:
            self.pa.close()            
        
    def audioinput(self):
        for i in range(self.SEC):
            self.data = self.stream.read(self.RATE, exception_on_overflow=False)
            self.data = np.fromstring(self.data, np.float32)
            self.total_data[:-self.RATE] = self.total_data[self.RATE:]
            self.total_data[-self.RATE:] = self.data
            
    def pltmel(self):
        self.mel = librosa.feature.melspectrogram(y=self.total_data,
                                                  sr=self.RATE,
                                                  n_fft=self.n_fft,
                                                  hop_length=self.hop_length,
                                                  n_mels=self.n_mels,
                                                  power=1.0,
                                                  fmin=self.f_min,
                                                  fmax=self.f_max)
        
        plt.rcParams["figure.figsize"] = (2.24, 2.24)
        plt.axis("off")
        plt.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])
        #plt.imshow(librosa.power_to_db(self.mel, ref=np.max))        
            
        librosa.display.specshow(librosa.power_to_db(self.mel, ref=np.max), y_axis="mel", x_axis="time")
        
        if self.STATE == "save_data":
            plt.savefig(str(os.path.join(self.label_dir, "{:03}.jpg".format(self.count))), bbox_inches=None, pad_inches=0, dpi=100)
        elif self.STATE == "test":
            plt.savefig(str(os.path.join(self.test_dir, "{:03}.jpg".format(self.count))), bbox_inches=None, pad_inches=0, dpi=100)
        plt.clf()
        
    def train(self, useCNN=True, epochs=10, hidden=256, learning_rate=0.01):
        n_classes = len(self.labels)
        data = []
        label = []
        for i in os.listdir(self.train_dir):
            print(i)
            for k,v in mel.labels.items():                
                if i == v:                    
                    label_dir = os.path.join(self.train_dir, i)
                    images = os.listdir(label_dir)
                    for img in images:                
                        data.append(plt.imread(os.path.join(label_dir, img)))
                        label.append(k)
        data = np.array(data)
        label = np.array(label)
        data, label = shuffle(data, label)
        
        data = data / 255.0
        data = data.astype("float32")
        label = label.astype("float32")
        
        self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(data, label, test_size=0.3, random_state=0)

        if useCNN:
            self.model = keras.Sequential([    
                keras.layers.Conv2D(filters=30, kernel_size=(3,3), padding="valid", input_shape=(224, 224, 3)),
                keras.layers.Activation("relu"),
                keras.layers.MaxPooling2D(pool_size=(3,3)),
                keras.layers.Dropout(0.5),
                keras.layers.Conv2D(filters=30, kernel_size=(3, 3), padding="valid"),
                keras.layers.Activation("relu"),
                keras.layers.MaxPooling2D(pool_size=(3,3)),
                keras.layers.Flatten(),
                keras.layers.Dense(128, activation=tf.nn.relu),
                keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])
        else:
            self.model = keras.Sequential([
                keras.layers.Flatten(input_shape=(224, 224, 3)),
                keras.layers.Dense(hidden, activation=tf.nn.relu),
                keras.layers.Dense(n_classes, activation=tf.nn.softmax)
            ])    
        self.model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        self.train_history = self.model.fit(self.train_x, self.train_y,
                                            epochs=epochs,
                                            validation_data=(self.test_x, self.test_y))
                        
        self.model.save(os.path.join(self.model_dir, "model_1.h5"))
        print("Saved model to disk")
        
    def test(self):
        model = load_model(os.path.join(self.model_dir, "model_1.h5"))
        img = plt.imread(os.path.join(self.test_dir, "{:03}.jpg".format(self.count)))
        img = (np.expand_dims(img,0))
        prediction = model.predict(img)
        result = np.argmax(prediction[0])
        return self.labels[result], prediction[0][result]

In [12]:
'''
label 종류
1 : doorbell (3음)
2 : fire_alarm
3 : hair_dry
'''
# mel = melCNN(state="save_data", label=3)
mel = melCNN()

In [13]:
mel.train()

doorbell
fire_alarm
hair_dry
Train on 630 samples, validate on 270 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Saved model to disk


In [15]:
mel.train_history.history['val_acc']

[0.55925924,
 0.84814817,
 0.9259259,
 0.9851852,
 0.9962963,
 0.9962963,
 0.9962963,
 0.9962963,
 0.9962963,
 0.9962963]

In [19]:
mel_test = melCNN(state="test")



fire_alarm	Acc : 1.0
4.852	sec




hair_dry	Acc : 1.0
3.724	sec




doorbell	Acc : 1.0
3.851	sec




doorbell	Acc : 1.0
3.766	sec




doorbell	Acc : 1.0
3.92	sec




doorbell	Acc : 1.0
4.027	sec




doorbell	Acc : 1.0
4.953	sec




doorbell	Acc : 1.0
4.573	sec




doorbell	Acc : 1.0
4.555	sec




doorbell	Acc : 1.0
4.721	sec




doorbell	Acc : 1.0
4.752	sec




doorbell	Acc : 1.0
4.912	sec




doorbell	Acc : 1.0
5.04	sec




doorbell	Acc : 1.0
5.12	sec




doorbell	Acc : 1.0
5.414	sec




doorbell	Acc : 1.0
5.381	sec




doorbell	Acc : 0.9917622804641724
5.588	sec




doorbell	Acc : 1.0
5.768	sec




hair_dry	Acc : 1.0
5.799	sec




hair_dry	Acc : 1.0
6.038	sec




doorbell	Acc : 1.0
6.268	sec




doorbell	Acc : 1.0
6.336	sec




doorbell	Acc : 1.0
6.612	sec




doorbell	Acc : 1.0
6.954	sec




doorbell	Acc : 1.0
6.732	sec




doorbell	Acc : 1.0
6.817	sec




doorbell	Acc : 0.9999992847442627
6.946	sec




doorbell	Acc : 1.0
7.153	sec




doorbell	Acc : 1.0
7.439	sec




doorbell	Acc : 1.0
7.36	sec




doorbell	Acc : 1.0
7.682	sec




doorbell	Acc : 1.0
7.831	sec




TypeError: close() missing 1 required positional argument: 'stream'

<Figure size 161.28x161.28 with 0 Axes>