**Creating Images**

In [51]:
import cv2
from keras.models import load_model
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.display

def create_spectrogram_images(filename):
    plt.interactive(False)
    clip,sample_rate = librosa.load(filename,sr=None)
    fig = plt.figure(figsize=[0.74,0.74])
    
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    
    s = librosa.feature.melspectrogram(y=clip,sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(s,ref=np.max))
    
    plt.savefig(filename[:-3]+"jpg",dpi=400,bbox_inches='tight',pad_inches=0)
    
audio_file= "E:\\DeepLearning\\Custom_data\\Wavs\\good\\Good_Audio (1).wav"
create_spectrogram_images(audio_file)

In [47]:
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
import numpy as np
import tensorflow.keras

Note:- Class 0 is bad , Class 1 is good

**Loading Images**

In [52]:
b_img_path = "E:\\DeepLearning\\Custom_data\\Wavs\\Bad\\bad (1).jpg"
b_img = image.load_img(b_img_path, target_size=(224,224))
b_img = image.img_to_array(b_img)
b_img = np.expand_dims(b_img, axis=0)


g_img_path = "E:\\DeepLearning\\Custom_data\\Wavs\\good\\Good_Audio (1).jpg"
g_img = image.load_img(g_img_path, target_size=(224,224))
g_img = image.img_to_array(g_img)
g_img = np.expand_dims(g_img, axis=0)


**Vgg-16**

In [53]:
model = tensorflow.keras.models.load_model(r'E:\DeepLearning\Models\vgg16.h5')

In [54]:
from keras.applications.vgg16 import preprocess_input
b_img = preprocess_input(b_img)
g_img = preprocess_input(g_img)

In [55]:
print(model.predict(b_img))

[[0.9999496  0.00005036]]


In [56]:
print(model.predict(g_img))

[[0.99989676 0.00010324]]


**Vgg-19**

In [57]:
model = tensorflow.keras.models.load_model(r'E:\DeepLearning\Models\vgg19.h5')

In [58]:
from keras.applications.vgg19 import preprocess_input
b_img = preprocess_input(b_img)
g_img = preprocess_input(g_img)

In [59]:
print(model.predict(b_img))

[[0.05446598 0.94553405]]


In [65]:
print(model.predict(g_img))

[[0.12661274 0.8733873 ]]


**Resnet50**

In [66]:
model = tensorflow.keras.models.load_model(r'E:\DeepLearning\Models\res50.h5')

In [67]:
from keras.applications.resnet50 import preprocess_input
b_img = preprocess_input(b_img)
g_img = preprocess_input(g_img)

In [68]:
print(model.predict(b_img))

[[0.5357092  0.46429077]]


In [69]:
print(model.predict(g_img))

[[0.4642119  0.53578806]]


**Custom -TM(image)**

In [70]:
import tensorflow.keras
from PIL import Image, ImageOps
import numpy as np

In [71]:
model = tensorflow.keras.models.load_model(r'E:\DeepLearning\Models\tm_custom_model(image-pairs).h5')



In [72]:
np.set_printoptions(suppress=True)
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [73]:
data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)

In [74]:
g_img = Image.open("E:\\DeepLearning\\Custom_data\\Wavs\\good\\Good_Audio (1).jpg")

In [75]:
size = (224, 224)
g_img = ImageOps.fit(g_img, size, Image.ANTIALIAS)

In [76]:
image_array = np.asarray(g_img)
normalized_image_array = (image_array.astype(np.float32) / 127.0) - 1
# Load the image into array
data[0] = normalized_image_array
prediction = model.predict(data)
prediction



array([[0.9502941 , 0.04970594]], dtype=float32)

In [77]:
b_img = Image.open("E:\\DeepLearning\\Custom_data\\Wavs\\Bad\\bad (1).jpg")
size = (224, 224)
b_img = ImageOps.fit(b_img, size, Image.ANTIALIAS)

In [78]:
image_array = np.asarray(b_img)
normalized_image_array = (image_array.astype(np.float32) / 127.0) - 1
# Load the image into array
data[0] = normalized_image_array
prediction = model.predict(data)
prediction

array([[0.44903803, 0.550962  ]], dtype=float32)

**Naive-Model**

In [79]:
model = tensorflow.keras.models.load_model(r'E:\DeepLearning\Models\naive.h5')

In [80]:
b_img = image.load_img("E:\\DeepLearning\\Custom_data\\Wavs\\Bad\\bad (1).jpg", target_size=(64, 64))

g_img = image.load_img("E:\\DeepLearning\\Custom_data\\Wavs\\good\\Good_Audio (1).jpg", target_size=(64, 64))

In [81]:
b_img = image.img_to_array(b_img)
b_img = np.expand_dims(b_img, axis=0)
prob_score = model.predict(b_img)
prob_score



array([[0.48213777, 0.51786226]], dtype=float32)

In [82]:
g_img = image.img_to_array(g_img)
g_img = np.expand_dims(g_img, axis=0)
prob_score = model.predict(g_img)
prob_score

array([[0.4724474, 0.5275526]], dtype=float32)