# Importing Libraries

In [1]:
import os
import shutil

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
import seaborn as sns

import librosa
import librosa.display
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

# !sudo apt-get install p7zip-full

In [2]:
def label_cough_frames(time_frames, amp, sf):
  y = np.zeros(amp.shape[0])
  #print('len', amp.shape[0])
  for x in time_frames:
    start = int(x[0]*sf)
    end = int(x[1]*sf)
    y[start:end+1] = 1
    #print('start', f'{x[0]} sec ~ pos ', start, 'end', f'{x[1]} sec ~ pos ', end )
  return y

def get_audio_files_and_cough_timeframe_from_csv(df, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16):
  """
  Get file names based on spreadsheet (Dataset workflow.csv) and start end time of cough

    Parameters
      df (DataFrame): Contains of filenames and start end time values of cough sounds
      filenames (list): List of filenames
      filename_index (str): Column name of the df that holds values of file names
      start_end_time_prefix (str): Optional prefix text in the column names for start and end time of cough sounds
      max_start_end_count (int): Maximum number of column names for start and end time of cough sounds
  """
  # get column names for start and end time
  col_names = [f'{start_end_time_prefix}{x}' for x in range(1,17)]

  extract_uuid = lambda file_path: os.path.splitext(os.path.basename(file_path))[0]

  amplitude_and_sampling_freq = lambda file_path: librosa.load(file_path)

  dic = {"amp":[], "sf":[], "shape":[], "cough_start_end":[], "label":[]}
  files = []
  for file_name in os.listdir(audio_file_path):
    file_path = os.path.join(audio_file_path,file_name)
    uuid = extract_uuid(file_path)
    times = df[ df[filename_index] == uuid ][col_names].dropna(axis=1).values
    frame = []
    if len(times) > 0:
      frame.extend([list(map(float, x.strip().split(','))) for x in times[0] if len(x.strip().split(',')) == 2])

    # print(frame)
    # break

    files.append(uuid)
    amp, sf = amplitude_and_sampling_freq(file_path)
    dic["amp"].append(amp)
    dic["sf"].append(sf)
    dic["shape"].append(amp.shape)

    if len(frame) > 0:
      #print(uuid, times)
      dic["cough_start_end"].append(frame)
      dic["label"].append(label_cough_frames(frame, amp, sf))
    else:
      dic["cough_start_end"].append(np.nan)
      dic["label"].append(np.nan)


  return pd.DataFrame(dic, index=files)

def plot_amp_vs_label(title, visual_set, audio_df2, percent=0):
  for i, v in visual_set.items():
    if percent > 0:
      argmax = np.argmax(audio_df2.loc[v]["amp"])
      three_percent = len(audio_df2.loc[v]["amp"])*percent
      start_zoom = int(argmax - three_percent)
      end_zoom = int(argmax + three_percent)
      print(f'Max value {np.max(audio_df2.loc[v]["amp"])} pos {argmax} {start_zoom},{end_zoom}')
      pd.Series( audio_df2.loc[v]["amp"][start_zoom:end_zoom]).plot(figsize=(16, 4),lw=1, title=f'{i} {title} {v}')
      pd.Series( audio_df2.loc[v]["label"][start_zoom:end_zoom]).plot(figsize=(16, 4),lw=2)
    else:
      pd.Series( audio_df2.loc[v]["amp"]).plot(figsize=(16, 4),lw=1, title=f'{i} {title} {v}')
      pd.Series( audio_df2.loc[v]["label"]).plot(figsize=(16, 4),lw=2)

    plt.show()

In [3]:
df = pd.read_csv("../dataset/Dataset workflow.csv")
audio_file_path = "../dataset/audio_files/AWS Audio Files"
# get_audio_files_and_cough_timeframe_from_csv(df, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16)

FileNotFoundError: [Errno 2] No such file or directory: '../dataset/Dataset workflow.csv'

In [4]:
audio_df = get_audio_files_and_cough_timeframe_from_csv(df, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16)

NameError: name 'df' is not defined

In [None]:
audio_df

NameError: name 'audio_df' is not defined

In [None]:
audio_df.info()

NameError: name 'audio_df' is not defined

In [None]:
amplitude_and_sampling_freq = librosa.load("../dataset/audio_files/AWS Audio Files/005b8518-03ba-4bf5-86d2-005541442357.wav")
amplitude_and_sampling_freq

(array([ 0.        ,  0.        ,  0.        , ..., -0.02018352,
        -0.01895304, -0.01745526], dtype=float32),
 22050)

In [None]:
audio_df2 = audio_df.dropna()
audio_df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 150 entries, 0029d048-898a-4c70-89c7-0815cdcf7391 to ff1234d7-7837-4ba7-842f-99fdc916baa9
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   amp              150 non-null    object
 1   sf               150 non-null    int64 
 2   shape            150 non-null    object
 3   cough_start_end  150 non-null    object
 4   label            150 non-null    object
dtypes: int64(1), object(4)
memory usage: 7.0+ KB


In [None]:
def apply_framing(audio_df):
  def create_overlapping_frames(key, amp, label, sf, frame_size, hop_length):
    # Calculate the total number of frames
    if len(amp) == len(label):
      total_frames = 1 + int((len(amp) - frame_size) / hop_length)
      dic = {"key":[], "sf":[], "start":[], "end":[], "max_amp":[], "frame_index":[], "amp":[], "label":[]}

      # Create overlapping frames
      for i in range(total_frames):
        dic["key"].append(key)
        dic["sf"].append(sf)
        dic["start"].append(i * hop_length)
        dic["end"].append(i * hop_length + frame_size)
        dic["max_amp"].append(np.max(amp[i * hop_length: i * hop_length + frame_size]))
        dic["frame_index"].append(i)
        dic["amp"].append(amp[i * hop_length: i * hop_length + frame_size])
        frames_label_raw = label[i * hop_length: i * hop_length + frame_size]

        f_label = 0
        if np.sum(frames_label_raw==1) > (len(frames_label_raw) / 2):
          f_label = 1
        dic["label"].append(f_label)

      return dic
    else:
      print('Error: non matching amp and labels', key, len(amp), len(label))

  def create_and_label_frames(audio_df):
    return create_overlapping_frames(audio_df.name, audio_df["amp"], audio_df["label"],audio_df["sf"],frame_size, hop_length)

  all_frames = {}
  for frame_size in [256, 512, 1024]:
    hop_length = frame_size // 2

    frame_df = pd.DataFrame()
    #xx = audio_df.head(1).apply(create_and_label_frames, axis=1)
    xx = audio_df.apply(create_and_label_frames, axis=1)
    for x in xx:
      if len(frame_df) > 0:
        frame_df = pd.concat([frame_df, pd.DataFrame(x)], ignore_index=True)
      else:
        frame_df = pd.DataFrame(x)

    all_frames[frame_size] = frame_df
    print(f'Frame size {frame_size}, hop_length {hop_length}, count {len(frame_df)}')
  return all_frames


# all_frames = apply_framing(audio_df=audio_df2)
# framed_df = all_frames[1024]
# framed_df

In [None]:
# df_to_save = audio_df2.copy()
# df_to_save.reset_index(inplace=True)
# df_to_save.to_feather("../dataset/audio_data_before_framing.feather")

In [None]:
df_from_save = pd.read_feather("../dataset/audio_data_before_framing.feather")
df_from_save['key'] = df_from_save['index']
df_from_save.drop(columns=['index'], inplace=True)
df_from_save.set_index('key', inplace=True)
df_from_save

Unnamed: 0_level_0,amp,sf,shape,cough_start_end,label
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0029d048-898a-4c70-89c7-0815cdcf7391,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",22050,[216972],"[[0.720196, 1.122022], [1.551667, 1.996767], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
008ba489-31ad-44d8-856b-fcf72369dc46,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,[215649],"[[4.019377, 4.52724], [4.551424, 4.880326], [4...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
008c1c9e-aeef-40c5-846c-24f1b964f884,"[-1.1011437e-12, -6.4785234e-12, -4.3936547e-1...",22050,[219618],"[[2.297893, 2.742448], [2.795401, 3.106958], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
00bf9f83-2e8f-47cf-a4f2-97f2beceebc1,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,[60858],"[[1.451179, 1.887393]]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
01567151-7bb2-45ee-9aa8-a1332b5941ea,"[-1.702421e-12, -6.2909625e-12, -2.9731185e-12...",22050,[219618],"[[2.222774, 2.679644], [2.71043, 3.130356], [3...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...
fb0971e2-8ebf-459c-972d-b09d28ae0ca6,"[1.6668692e-11, 2.2999237e-12, -2.6049822e-11,...",22050,[216972],"[[1.890686, 2.819863], [3.777154, 4.561543], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
fd7d172d-4106-427a-870b-0384c88f147f,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,[72765],"[[0.631714, 1.039029], [1.076743, 1.4256], [1....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
fd849b72-f4bf-4852-9bc2-fd9becc9571e,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,[100548],"[[1.759509, 2.532103], [2.58552, 2.751634]]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
fed255ec-4829-4f4a-b22d-9bb23f2dd89f,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,[218295],"[[2.6532, 3.0888], [3.173657, 3.580971], [3.58...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [None]:
all_frames_from_save = apply_framing(audio_df=df_from_save)

Frame size 256, hop_length 128, count 226033
Frame size 512, hop_length 256, count 112895
Frame size 1024, hop_length 512, count 56337


In [None]:
df_frame_from_save = all_frames_from_save[256]
df_frame_from_save

Unnamed: 0,key,sf,start,end,max_amp,frame_index,amp,label
0,0029d048-898a-4c70-89c7-0815cdcf7391,22050,0,256,0.000041,0,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",0
1,0029d048-898a-4c70-89c7-0815cdcf7391,22050,128,384,0.000031,1,"[1.811356e-06, -1.3867169e-06, 1.9455676e-06, ...",0
2,0029d048-898a-4c70-89c7-0815cdcf7391,22050,256,512,0.000064,2,"[-2.082721e-06, 4.0137297e-06, -1.9538369e-05,...",0
3,0029d048-898a-4c70-89c7-0815cdcf7391,22050,384,640,0.001784,3,"[-2.2820053e-05, 8.440122e-07, 6.248148e-07, -...",0
4,0029d048-898a-4c70-89c7-0815cdcf7391,22050,512,768,0.077962,4,"[6.083851e-05, 6.2495135e-05, 4.0901323e-05, 2...",0
...,...,...,...,...,...,...,...,...
226028,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216192,216448,0.000000,1689,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
226029,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216320,216576,0.000000,1690,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
226030,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216448,216704,0.000000,1691,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
226031,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216576,216832,0.000000,1692,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0


In [None]:
df_frame_from_save["amp"].values[0].shape
df_frame_from_save["amp"].values[1].shape

(256,)

In [None]:
mfccs = librosa.feature.mfcc(y=df_frame_from_save["amp"].values[0], sr=sr, n_mfcc=13)
mfccs

array([[-929.2108   ],
       [  15.370566 ],
       [ -13.771263 ],
       [   7.956869 ],
       [ -13.139961 ],
       [  15.547903 ],
       [  -5.4487944],
       [   2.0890393],
       [  -5.931996 ],
       [   4.332015 ],
       [  -9.814028 ],
       [  -1.732458 ],
       [   4.981378 ]], dtype=float32)

In [None]:
D = librosa.stft(y=df_frame_from_save["amp"].values[0])
magnitude = np.abs(D)
db = librosa.amplitude_to_db(magnitude, ref=np.max)
db.shape



(1025, 1)

In [None]:
def extract_features(amplitude):

    D = librosa.stft(y=amplitude)
    magnitude = np.abs(D)
    db = librosa.amplitude_to_db(magnitude, ref=np.max)

    return db

df_frame_from_save["stft"] = df_frame_from_save["amp"].apply(lambda x: extract_features(x))
df_frame_from_save

Unnamed: 0,key,sf,start,end,max_amp,frame_index,amp,label,stft
0,0029d048-898a-4c70-89c7-0815cdcf7391,22050,0,256,0.000041,0,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",0,"[[-37.2752], [-22.70089], [-17.135803], [-14.3..."
1,0029d048-898a-4c70-89c7-0815cdcf7391,22050,128,384,0.000031,1,"[1.811356e-06, -1.3867169e-06, 1.9455676e-06, ...",0,"[[-5.2825394], [-5.409523], [-5.7495193], [-6...."
2,0029d048-898a-4c70-89c7-0815cdcf7391,22050,256,512,0.000064,2,"[-2.082721e-06, 4.0137297e-06, -1.9538369e-05,...",0,"[[-8.421017], [-8.349125], [-8.115986], [-7.67..."
3,0029d048-898a-4c70-89c7-0815cdcf7391,22050,384,640,0.001784,3,"[-2.2820053e-05, 8.440122e-07, 6.248148e-07, -...",0,"[[0.0], [-0.01589775], [-0.06294823], [-0.1397..."
4,0029d048-898a-4c70-89c7-0815cdcf7391,22050,512,768,0.077962,4,"[6.083851e-05, 6.2495135e-05, 4.0901323e-05, 2...",0,"[[-32.595497], [-16.71085], [-10.883673], [-7...."
...,...,...,...,...,...,...,...,...,...
226028,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216192,216448,0.000000,1689,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0,"[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...."
226029,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216320,216576,0.000000,1690,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0,"[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...."
226030,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216448,216704,0.000000,1691,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0,"[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...."
226031,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,216576,216832,0.000000,1692,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0,"[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...."


In [None]:
df_frame_from_save["stft"][0].flatten().shape

(1025,)

In [None]:
data = []
for i in range(len(df_frame_from_save)):
    data.append(df_frame_from_save["stft"][0].flatten())
X = np.array(data)

In [None]:
X.shape

(226033, 1025)

# Model Building

In [None]:
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

ModuleNotFoundError: No module named 'xgboost'

In [None]:
# X = df_frame_from_save["mfccs"]
y = df_frame_from_save["label"]

In [None]:
y

0         0
1         0
2         0
3         0
4         0
         ..
226028    0
226029    0
226030    0
226031    0
226032    0
Name: label, Length: 226033, dtype: int64

In [None]:
y.value_counts() / y.shape[0] * 100

label
0    78.654444
1    21.345556
Name: count, dtype: float64

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
X_train[0].shape

(1025,)

# Logistic Regression

In [None]:
lr = LogisticRegression(penalty="elasticnet",verbose=1,l1_ratio=0.4,solver="saga")

In [None]:
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)


max_iter reached after 548 seconds




In [None]:
accuracy_score(y_test,y_pred)

0.7876213860685292

In [None]:
accuracy_score(y_test,y_pred)

0.9164288716349238

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.79      1.00      0.88     35606
           1       0.00      0.00      0.00      9601

    accuracy                           0.79     45207
   macro avg       0.39      0.50      0.44     45207
weighted avg       0.62      0.79      0.69     45207



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# XGBoost

In [None]:
xgb = XGBClassifier()
xgb.fit(X_train,y_train)
y_pred = xgb.predict(X_test)


In [None]:
accuracy_score(y_test,y_pred)

0.9383944964275444

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96     35606
           1       0.87      0.84      0.85      9601

    accuracy                           0.94     45207
   macro avg       0.91      0.90      0.91     45207
weighted avg       0.94      0.94      0.94     45207



In [None]:

!pip3 install gammatone


In [None]:

import numpy as np
import matplotlib.pyplot as plt
from gammatone.gtgram import gtgram
import pandas as pd

# Load your data (replace this with your actual data loading mechanism)
# Example: data = pd.read_csv('your_data.csv')

# Assuming your DataFrame has a column 'audio_data' with the audio samples
# Replace 'audio_data_column' with the actual column name
# audio_data = data['audio_data_column'].values[0]  # Example: first row audio data

# Sample rate and window length
sample_rate = 22050  # or the sample rate of your audio data
window_length = 0.025  # 25ms window length
hop_length = 0.010  # 10ms hop length
channels = 64  # Number of filters in the filterbank

# Example audio data (you should replace this with your actual audio data)
# audio_data = np.random.randn(sample_rate)  # 1 second of white noise

# Calculate the gammatone spectrogram
spec = gtgram(audio_data, sample_rate, window_length, hop_length, channels)

# Plot the gammatone spectrogram
plt.figure(figsize=(20, 5))
plt.imshow(10 * np.log10(spec), aspect='auto', origin='lower', cmap='jet')
plt.colorbar(format='%+2.0f dB')
plt.title('Gammatone Spectrogram')
plt.ylabel('Frequency Channel')
plt.xlabel('Time (s)')
plt.show()
