In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Pre-Processing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm import tqdm
import cv2
import random

In [2]:
data = tf.keras.utils.image_dataset_from_directory('/content/drive/MyDrive/stft_spectograms')

Found 2080 files belonging to 2 classes.


In [3]:
data_iterator=data.as_numpy_iterator()
batch = data_iterator.next()

In [4]:
batch[0].shape

(32, 256, 256, 3)

In [5]:
data = data.map(lambda x,y:(x/255,y))

In [6]:
data.as_numpy_iterator().next()

(array([[[[0.32941177, 0.07450981, 0.49019608],
          [0.32941177, 0.07450981, 0.49019608],
          [0.32941177, 0.07450981, 0.49019608],
          ...,
          [0.37254903, 0.09411765, 0.49803922],
          [0.37254903, 0.09411765, 0.49803922],
          [0.37254903, 0.09411765, 0.49803922]],
 
         [[0.44231004, 0.12069546, 0.5030254 ],
          [0.44231004, 0.12069546, 0.5030254 ],
          [0.44231004, 0.12069546, 0.5030254 ],
          ...,
          [0.38024664, 0.09668352, 0.5006051 ],
          [0.38024664, 0.09668352, 0.5006051 ],
          [0.38024664, 0.09668352, 0.5006051 ]],
 
         [[0.39060202, 0.10853247, 0.48154873],
          [0.39060202, 0.10853247, 0.48154873],
          [0.39060202, 0.10853247, 0.48154873],
          ...,
          [0.4025965 , 0.1046875 , 0.50362283],
          [0.4025965 , 0.1046875 , 0.50362283],
          [0.4025965 , 0.1046875 , 0.50362283]],
 
         ...,
 
         [[0.59454656, 0.17540595, 0.4997013 ],
          [0.59454

In [7]:
scaled_iterator = data.as_numpy_iterator()

In [8]:
scaled_iterator.next()[0].max()

0.99607843

In [9]:
len(data)

65

In [10]:
train_size=int(len(data)*.70)
val_size=int(len(data)*.15)
test_size=int(len(data)*.15)

In [11]:
train=data.take(train_size)
val=data.skip(train_size).take(val_size)
test=data.skip(train_size+val_size).take(test_size)

### Model

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

In [13]:
model=Sequential()

In [14]:
model.add(Conv2D(16,(3,3),1, activation='relu',input_shape=(256,256,3)))
model.add(MaxPooling2D())

model.add(Conv2D(32,(3,3),1, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(32,(3,3),1, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(16,(3,3),1, activation='relu'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(256,activation='relu'))
model.add(Dense(1,activation='sigmoid'))

In [15]:
model.compile('adam',loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2  (None, 127, 127, 16)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 30, 30, 32)        0

In [17]:
logdir='/content/drive/MyDrive/logs'

In [18]:
tensorboard_callback=tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [19]:
hist=model.fit(train,epochs=25,validation_data=val,callbacks=[tensorboard_callback])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [20]:
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy, FalseNegatives, FalsePositives, TrueNegatives, TruePositives

In [21]:
pre=Precision()
re=Recall()
acc=BinaryAccuracy()

In [22]:
for batch in test.as_numpy_iterator():
    X,y=batch
    yhat=model.predict(X)
    pre.update_state(y,yhat)
    re.update_state(y,yhat)
    acc.update_state(y,yhat)



In [23]:
print(pre.result().numpy())
print(re.result().numpy())
print(acc.result().numpy())

0.9303483
0.96391755
0.9270833


In [24]:
fn = FalseNegatives()
fp = FalsePositives()
tn = TrueNegatives()
tp = TruePositives()

In [25]:
for batch in test.as_numpy_iterator():
    X,y=batch
    yhat=model.predict(X)
    fn.update_state(y,yhat)
    fp.update_state(y,yhat)
    tn.update_state(y,yhat)
    tp.update_state(y,yhat)



In [26]:
print(f"True Positive={tp.result().numpy()}, False Positive={fp.result().numpy()}, True Negative={tn.result().numpy()}, False Negative={fn.result().numpy()}")

True Positive=194.0, False Positive=15.0, True Negative=73.0, False Negative=6.0


### Manual Test

In [27]:
import librosa as lb

In [28]:
signal, sample_rate = lb.load('/content/Recording_16.wav')

In [29]:
def generate_spectogram(audio_folder, image_dim, output_folder = "user"):

  output_dir = os.path.join(audio_folder, output_folder)
  os.makedirs(output_dir, exist_ok=True)

  n_frames=10
  hop_length = len(signal)//n_frames
  n_fft = 2048

  for i in range(n_frames):
    start_sample = i*hop_length
    end_sample =start_sample + hop_length
    if end_sample <= len(signal):
      frame = signal[start_sample:end_sample]
      D_frame = lb.stft(frame, n_fft = n_fft, hop_length = 512)
      lb.display.specshow(lb.amplitude_to_db(D_frame), sr=sample_rate, hop_length=512)
      plt.axis('off')
      plt.savefig(os.path.join(output_dir, f'sample_frame_{i}.png'), bbox_inches='tight', pad_inches=0)
      plt.close()

In [30]:
audio_folder = '/content'
generate_spectogram(audio_folder, image_dim=(256,256), output_folder="specgram")

  lb.display.specshow(lb.amplitude_to_db(D_frame), sr=sample_rate, hop_length=512)


In [31]:
import cv2

In [32]:
for i in range(0,10):
  img=cv2.imread(f'/content/specgram/sample_frame_{i}.png')
  resize=tf.image.resize(img,(256,256))
  yhat=model.predict(np.expand_dims(resize/255,0))
  if yhat>0.5:
    print(f'pathology')
  else:
    print(f'healthy')

healthy
healthy
healthy
healthy
pathology
pathology
pathology
healthy
healthy
healthy
