In [None]:
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import IPython.display
import librosa
import librosa.display

In [None]:
audio_path = "./testset/fujitou_normal_test/fujitou_normal_002.wav"
y, sr = librosa.load(audio_path,offset=0.0,duration=7.0)

In [None]:
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128,n_fft=2048)

In [None]:
print(y)
print(sr)
print(S)

In [None]:
plt.figure(figsize=(12, 4))
librosa.display.specshow(S, sr=sr, x_axis='time', y_axis='mel')
plt.title('mel power spectrogram')
plt.colorbar(format='%02.0f dB')
plt.tight_layout()

In [None]:
log_S = librosa.amplitude_to_db(S, ref=np.max)

In [None]:
import skimage
plt.figure(figsize=(12, 4))
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel',fmax=sr)
plt.title('mel power spectrogram')
#plt.colorbar(format='%02.0f dB')
plt.tight_layout()

In [None]:
print(log_S)

In [None]:
print(len(log_S))

In [None]:
print([len(v) for v in log_S])

In [None]:
import librosa, librosa.display

from scipy.io.wavfile import read
import matplotlib.pyplot as plt

data, fs = librosa.audio.load("./testset/fujitou_normal_test/fujitou_normal_002.wav",offset=0.0,duration=7.0)
# メル周波数ケプストラムを取得
melspecs = librosa.feature.melspectrogram(y=data, sr=fs,
                                          n_fft=2048, n_mels=128)
mels = librosa.power_to_db(melspecs, ref=np.max)
# 可視化
librosa.display.specshow(mels,x_axis='time', y_axis='mel', fmax=fs)

In [None]:
class MUSIC_NET():
    def __init__(self, ):
        super(MUSIC_NET, self).__init__(
            conv1=L.Convolution2D(in_channels=1, out_channels=16,
                                  ksize=(16, 9), stride=4, pad=0,
                                  wscale=0.02 * math.sqrt(16 * 9)),
            conv2=L.Convolution2D(in_channels=16, out_channels=32,
                                  ksize=(5, 3), stride=2, pad=0,
                                  wscale=0.02 * math.sqrt(16 * 5 * 3)),
            conv3=L.Convolution2D(in_channels=32, out_channels=64,
                                  ksize=(3, 3), stride=2, pad=0,
                                  wscale=0.02 * math.sqrt(32 * 3 * 3)),
            fc4=L.Linear(in_size=64 * 14 * 19, out_size=4096,
                         wscale=0.02 * math.sqrt(64 * 14 * 19)),
            fc5=L.Linear(in_size=4096, out_size=7, wscale=0.02 * math.sqrt(4096)),
        )

    def __call__(self, x, t):
        y = self.forward(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        return loss, accuracy

    def forward(self, x):
        conv1 = F.relu(self.conv1(x))
        conv2 = F.relu(self.conv2(conv1))
        conv3 = F.relu(self.conv3(conv2))
        reshape3 = F.dropout(F.reshape(conv3, (-1, 64 * 14 * 19)), ratio=0.5)
        fc4 = F.dropout(F.relu(self.fc4(reshape3)), ratio=0.5)
        fc5 = self.fc5(fc4)
        return fc5

    def predict(self, x):
        y = self.forward(x)
        return F.softmax(y)

In [None]:
import numpy as np
import librosa
from scipy.io.wavfile import read


class READ_DATASET(object):
    def __init__(self, wavfile, chunk, length, expected_fs=None):
        fs, all_data = read(wavfile)
        if expected_fs != None and expected_fs != fs:
            print("It has difference between expected_fs and fs")
            raise AssertionError

        all_data = all_data.astype('float64') - 128.0
        all_data /= 128.0

        self.all_data = all_data
        self.sampling_rate = fs
        self.CHUNK = chunk
        self.length = length

        # ノイズを読み込む
        self.noise = np.load('noise/noise.npy')  # 8bit 16000Hz
        self.noise = self.noise.astype('float32') / 128.0

        # インデックス。初期化時は昇順にしておく
        n_bolcks_all = len(self.all_data) - self.CHUNK * self.length - 1
        self.indexes = np.linspace(0, n_bolcks_all, int(n_bolcks_all / 5.0)).astype(np.int64)
        self.n_blocks = len(self.indexes)

        print("sampling rate is {}".format(fs))

    def shuffle_indexes(self):
        self.indexes = np.random.permutation(len(self.indexes))

    # ノイズの追加
    def _add_noise(self, data, scale=None):
        if scale is None:
            scale = np.random.uniform(low=0.001, high=3.0)
        start_i = np.random.randint(low=0, high=len(self.noise) - len(data))
        noise = self.noise[start_i:(start_i + len(data))]
        data_with_noise = data + noise * scale
        return data_with_noise

    # 音量調整
    def _change_volume(self, data, volume=None):
        if volume is None:
            volume = np.random.uniform(low=0.1, high=1.0)
        data_changed_vol = data * volume
        return data_changed_vol

    # 1個データを取り出す(mel-spec)
    def get_one_melspec(self, index):
        start_i = self.indexes[index]
        data = self.all_data[start_i:(start_i + self.CHUNK * self.length)].copy()

        # データの変形
        data = self._add_noise(data)  # ノイズ追加
        data = self._change_volume(data)  # 音量調節

        melspecs = librosa.feature.melspectrogram(y=data, sr=self.sampling_rate,
                                                  n_fft=2048, n_mels=256)
        return melspecs

    # 複数個データを取り出す(mel-spec)
    def get_batch_melspec(self, indexes):
        melspecs_dataset = list()
        for index in indexes:
            melspecs = self.get_one_melspec(index)
            melspecs_dataset.append(melspecs[np.newaxis, :])
        return np.array(melspecs_dataset)

def main():
    read_dataset = READ_DATASET(wavfile='8bit-16000Hz.wav',
                                chunk=1024, length=160, expected_fs=16000)
    read_dataset.shuffle_indexes()

    melspecs_s = read_dataset.get_batch_melspec(np.arange(10))

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd  # おなじみpandas
import glob  # ファイル名を取得するライブラリ
import librosa  # 今回の主役librosa

In [None]:
list_ceps = []  # 抽出したMFCCを格納するリスト
list_label = []  # 正常(0)、異常(1)ラベルを格納するリスト

In [None]:
# 続いてanormalyのwavを処理
filelist = glob.glob('dataset/1/*.wav')  # ワイルドカードでanomaly_*.wavのリストを作成

for filename in filelist:
    y, sr = librosa.core.load(filename,sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
       
    # 複数のローリングウィンドウでそれぞれ20次元のMFCCを得られるので、その平均をとる。
    ceps = mfcc.mean(axis=1)    
    
    # リストに追加
    list_ceps.append(ceps)  # 20次元のMFCCを追加
    list_label.append(1)  # 異常ラベル(1)を追加

In [None]:
# normalのwavを処理
filelist = glob.glob('dataset/0/*.wav')  # ワイルドカードでnormal_*.wavのリストを作成

for filename in filelist:
    y, sr = librosa.core.load(filename,sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    
    # 複数のローリングウィンドウでそれぞれ20次元のMFCCを得られるので、その平均をとる。
    ceps = mfcc.mean(axis=1)
    
    # リストに追加
    list_ceps.append(ceps)  # 20次元のMFCCを追加
    list_label.append(0)  # 正常ラベル(0)を追加


In [None]:
# データフレーム化

# 20次元のMFCCのデータフレームを作成
df_ceps = pd.DataFrame(list_ceps)  

columns_name = []  # カラム名を"dct+連番"でつける
for i in range(20):
    columns_name_temp = 'dct{0}'.format(i)
    columns_name.append(columns_name_temp)

df_ceps.columns = columns_name

# ラベル（正常0、異常1）のデータフレームを作成
df_label = pd.DataFrame(list_label, columns=['label'])  

# 横にconcat
df = pd.concat([df_label, df_ceps], axis = 1)

# ぱわー！！！

In [None]:
import librosa
import librosa.display
from PIL import Image
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import skimage
from skimage import io
def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled
def save_png(filename,soundpath,savepath):
    # オーディオファイル(au）を読み込む
    music, fs = librosa.audio.load(soundpath + filename, offset=0.0, duration=7.0)
    # メルスペクトラム（MFCC）変換
    mfccs = librosa.feature.melspectrogram(music, sr=fs,n_fft=2048, n_mels=128)
    mfccspw = librosa.power_to_db(mfccs, ref=np.max)
    
    # グラフに変換する
    im = librosa.display.specshow(mfccspw, sr=fs,cmap="gray")
    # PNG形式画像で保存する
#    plt.savefig(savepath + filename + '.png',dpi=200)
    # min-max scale to fit inside 8-bit range
    img = scale_minmax(mfccspw, 0, 255).astype(np.uint8)
    
    img = np.flip(img, axis=0) # put low frequencies at the bottom in image

    # save as PNG
    skimage.io.imsave(savepath + filename + '.png', img)
soundpath = './dataset/0/'
savepath = './save_wav_image/0/'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
    save_png(filename,soundpath,savepath)

# 分割ぱわー！！！！！

In [None]:
import librosa
import librosa.display
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import skimage
from skimage import io


# def scale_minmax(X, min=0.0, max=1.0):
#     X_std = (X - X.min()) / (X.max() - X.min())
#     X_scaled = X_std * (max - min) + min
#     return X_scaled
def save_png(filename,soundpath,savepath):
    x = 0
    for i in np.arange(0, 2, 0.2):
        # オーディオファイル(au）を読み込む
        music, fs = librosa.audio.load(soundpath + filename, offset=i, duration=0.2)
        # メルスペクトラム（MFCC）変換
        mfccs = librosa.feature.melspectrogram(music, sr=fs, n_mels=128)
        #配列データをgain(-80db~0db)に変換
        mfccspw = librosa.power_to_db(mfccs, ref=np.max)
        #無音部分の除去
        mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])
        # グラフに変換する
#        im = librosa.display.specshow(mfccspw, sr=fs,cmap="gray")
        # PNG形式画像で保存する
        #plt.savefig(savepath + filename + '.png',dpi=200)
        # min-max scale to fit inside 8-bit range
#         img = scale_minmax(mfccspw, 0, 255).astype(np.uint8)
#        img = mfccspw.astype(np.uint8)
#         img = np.flip(img, axis=0) # put low frequencies at the bottom in image

        # save as PNG
        matplotlib.image.imsave(savepath + filename +"("+str(x)+")"+ '.png', mfccspw)
        x +=1
soundpath = './dataset/1/'
savepath = './save_split_image/1/'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
    save_png(filename,soundpath,savepath)

# numpy配列のスライスで分割

In [None]:
import librosa
import librosa.display
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
#import skimage
#from skimage import io


# def scale_minmax(X, min=0.0, max=1.0):
#     X_std = (X - X.min()) / (X.max() - X.min())
#     X_scaled = X_std * (max - min) + min
#     return X_scaled
def save_png(filename,soundpath,savepath):
    x = 0
    #オーディオファイルの読み込み
    music, fs = librosa.audio.load(soundpath + filename)
    #メルスペクトログラム変換
    mfccs = librosa.feature.melspectrogram(music, sr=fs, n_mels=128)
    #データ内容をGAIN(db)に変換
    mfccspw = librosa.power_to_db(mfccs, ref=np.max)
    #無音部分の除去
    mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])

    for i in np.arange(0, len(mfccspw[1])-10, 10):

        split = mfccspw[:,i:i+10]
        
        # save as PNG
        img = split.astype(np.uint8)
        matplotlib.image.imsave(savepath + filename +"("+str(x)+")"+ '.png', img)

#        cv2.imwrite(savepath + filename +"("+str(x)+")"+ '.png', img)
        x +=1
soundpath = './dataset/2/'
savepath = './save_split10_image/2/'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
    save_png(filename,soundpath,savepath)

In [None]:
guiter

In [7]:
import librosa
import librosa.display
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np


def save_png(filename,soundpath,savepath):
    x = 0
    #オーディオファイルの読み込み
    music, fs = librosa.audio.load(soundpath + filename,offset=1.6)
    #フーリエ
    D = librosa.stft(music)
    #データ内容をGAIN(db)に変換
    log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    for i in np.arange(0, len(log_power[1])-1000, 1000):

        split = log_power[:,i:i+1000]
        
        # save as PNG
        img = split.astype(np.uint8)
        matplotlib.image.imsave(savepath + filename +"("+str(x)+")"+ '.png', img)

#        cv2.imwrite(savepath + filename +"("+str(x)+")"+ '.png', img)
        x +=1
soundpath = './guiter_wav/6/'
savepath = './guiter_wav_image/6/'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
    save_png(filename,soundpath,savepath)

In [None]:
１mfccで分割

In [None]:
import librosa
import librosa.display
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
#import skimage
#from skimage import io


# def scale_minmax(X, min=0.0, max=1.0):
#     X_std = (X - X.min()) / (X.max() - X.min())
#     X_scaled = X_std * (max - min) + min
#     return X_scaled
def save_png(filename,soundpath,savepath):
    x = 0
    #オーディオファイルの読み込み
    music, fs = librosa.audio.load(soundpath + filename)
    #メルスペクトログラム変換
    mfccs = librosa.feature.mfcc(music, sr=fs, n_mels=128)
    #データ内容をGAIN(db)に変換
    mfccspw = librosa.power_to_db(mfccs, ref=np.max)
    #無音部分の除去
    mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])

    for i in np.arange(0, len(mfccspw[1])-10, 10):

        split = mfccspw[:,i:i+10]
        
        # save as PNG
        img = split.astype(np.uint8)
        matplotlib.image.imsave(savepath + filename +"("+str(x)+")"+ '.png', img)

#        cv2.imwrite(savepath + filename +"("+str(x)+")"+ '.png', img)
        x +=1
soundpath = './dataset/1/'
savepath = './save_mfcc_image//'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
    save_png(filename,soundpath,savepath)

In [None]:
import librosa
import librosa.display
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import skimage
from skimage import io
#オーディオファイルの読み込み
music, fs = librosa.audio.load("./testset/fujitou_normal_test/fujitou_normal_002.wav")
#メルスペクトログラム変換
mfccs = librosa.feature.melspectrogram(music, sr=fs)
#データ内容をGAIN(db)に変換
mfccspw = librosa.power_to_db(mfccs, ref=np.max, top_db=100)
#無音部分の除去
#ref=np.max)
#     #無音部分の除去
    mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])
    
        for i in np.arange(0, len(mfccspw[1])-10, 10):
        
                split = mfccspw[:,i:i+10]mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])
librosa.display.specshow(mfccs,x_axis='time', y_axis='mel', fmax=fs)


# plt.imshow(mfccs,cmap=plt.cm.gray)
# plt.yticks([]) # y軸を消す                                                  
# plt.xticks([]) # x軸を消す 
# plt.show()

In [None]:
librosa.display.specshow(mfccspw,x_axis='time', y_axis='mel', fmax=fs)

In [None]:
print(mfccspw)

## パワーしなかった場合

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import numpy as np
def save_png(filename,soundpath,savepath):
    # オーディオファイル(au）を読み込む
    music, fs = librosa.audio.load(soundpath + filename, offset=1.0, duration=7.0)
    # メルスペクトラム（MFCC）変換
    mfccs = librosa.feature.mfcc(music, sr=fs,n_fft=2048, n_mels=128)
    #mfccspw = librosa.power_to_db(mfccs, ref=np.max)
    
    # グラフに変換する
    librosa.display.specshow(mfccs, sr=fs,cmap="gray",fmax=fs)

    # PNG形式画像で保存する
    plt.savefig(savepath + filename + '.png',dpi=200)
    

soundpath = './dataset/2/'
savepath = './dataset_back/no_power_image/2/'
cnt = 0
for filename in os.listdir(soundpath):
    cnt += 1
    if((cnt % 10) == 0):
        print(cnt,'件を処理しました')
save_png(filename,soundpath,savepath)

In [None]:
import librosa, librosa.display
import IPython

from scipy.io.wavfile import read
import matplotlib.pyplot as plt

data, fs = librosa.audio.load("./testset/fujitou_normal_test/fujitou_normal_002.wav")
# メル周波数ケプストラムを取得
melspecs = librosa.feature.melspectrogram(y=data, sr=fs,
                                          n_fft=2048, n_mels=128)
mels = librosa.power_to_db(melspecs, ref=np.max)
melss = (mels[:, np.all(mels > -80, axis=0)])

print(len(melss[:,:10]))

# display(IPython.display.Audio(data, rate=fs))


In [None]:
    #オーディオファイルの読み込み
    music, fs = librosa.audio.load(soundpath + filename)
    #メルスペクトログラム変換
    mfccs = librosa.feature.melspectrogram(music, sr=fs, n_mels=128)
    #データ内容をGAIN(db)に変換
    mfccspw = librosa.power_to_db(mfccs, ref=np.max)
    #無音部分の除去
    mfccspw = (mfccspw[:, np.all(mfccspw > -80, axis=0)])


In [None]:
librosa.display.specshow(mfccspw,x_axis='time', y_axis='mel', fmax=fs)

In [None]:
librosa.display.specshow(melss,x_axis='time', y_axis='mel', fmax=fs)

In [None]:
import librosa, librosa.display

from scipy.io.wavfile import read
import matplotlib.pyplot as plt

data, fs = librosa.audio.load("./dataset_back/akane_normal/akane_normal_097.wav",
                              offset=0.0,duration=7.0)
#スプリット練習
non_silent_interval = librosa.effects.split(y=data, top_db=1.0, ref=np.max,hop_length=1000)
# 可視化
plt.plot(data[non_silent_interval[0][0]:non_silent_interval[0][1]])
plt.show()
# librosa.display.specshow(split,x_axis='time', y_axis='mel', fmax=fs)

# メル周波数ケプストラムを取得
# melspecs = librosa.feature.melspectrogram(y=data, sr=fs,n_fft=2048, n_mels=128,
#                                          hop_length=2048,win_length=2048)
# mels = librosa.power_to_db(melspecs, ref=np.max)
# # 可視化
# librosa.display.specshow(mels,x_axis='time', y_axis='mel', fmax=fs)

In [None]:
sound = np.array(sound.get_array_of_samples()).astype('f')

In [None]:
data, fs = librosa.audio.load("./dataset_back/uemura_normal/uemura_normal_001.wav",
                              offset=3.0,duration=3.99)
D = librosa.stft(data)
D
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)

log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
print(log_power)

log_powern = (log_power[:, ~np.all(log_power < -80, axis=0)])
print(log_powern)
librosa.display.specshow(log_powern,x_axis='time', y_axis='mel', fmax=fs)
plt.colorbar()
# mel = librosa.feature.melspectrogram(y=data, sr=fs,
#                                           n_fft=2048, n_mels=128)
# #np.array(data)
# data
# print(type(mel))
# print(mel.shape)
# pil_img_gray = Image.fromarray(mel)
# print(pil_img_gray.mode)
# pil_img_gray.save('teeest.jpeg')

In [None]:
data, fs = librosa.audio.load("./guiter_wav/0/am.wav",offset=1.6,duration=1.99)
D = librosa.stft(data)
D
#fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)

log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
librosa.display.specshow(log_power,x_axis='time', y_axis='mel', fmax=fs)
plt.colorbar()

In [None]:
log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
librosa.display.specshow(log_power, x_axis="time", y_axis="log")
plt.colorbar()

In [None]:
fig = plt.figure(1, figsize=(12,4)); ax = fig.add_subplot(1,1,1)

log_power = librosa.amplitude_to_db(np.abs(D), ref=np.max)
librosa.display.specshow(log_power, x_axis="time", y_axis="linear")
plt.colorbar()

In [None]:
librosa.effects.split(y, top_db=60, ref=<function amax>, frame_length=2048, hop_length=512)