# Importing Libraries

In [13]:
import os
import shutil

import pandas as pd
import numpy as np

import matplotlib.pylab as plt
import seaborn as sns

import librosa
import librosa.display
from sklearn.metrics import accuracy_score,classification_report

# !sudo apt-get install p7zip-full

# Loading Dataset

In [3]:
# sample_rate, data = wavfile.read('../dataset/audio_files/0a03da19-eb19-4f51-9860-78ad95fa8cb5.wav')

# Exploring Dataset

In [4]:
# sample_rate

In [5]:
# data

In [6]:
# np.unique(data)

In [146]:
def label_cough_frames(time_frames, amp, sf):
  y = np.zeros(amp.shape[0])
  #print('len', amp.shape[0])
  for x in time_frames:
    start = int(x[0]*sf)
    end = int(x[1]*sf)
    y[start:end+1] = 1
    #print('start', f'{x[0]} sec ~ pos ', start, 'end', f'{x[1]} sec ~ pos ', end )
  return y

def get_audio_files_and_cough_timeframe_from_csv(df, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16):
  """
  Get file names based on spreadsheet (Dataset workflow.csv) and start end time of cough

    Parameters
      df (DataFrame): Contains of filenames and start end time values of cough sounds
      filenames (list): List of filenames
      filename_index (str): Column name of the df that holds values of file names
      start_end_time_prefix (str): Optional prefix text in the column names for start and end time of cough sounds
      max_start_end_count (int): Maximum number of column names for start and end time of cough sounds
  """
  # get column names for start and end time
  col_names = [f'{start_end_time_prefix}{x}' for x in range(1,17)]

  extract_uuid = lambda file_path: os.path.splitext(os.path.basename(file_path))[0]

  amplitude_and_sampling_freq = lambda file_path: librosa.load(file_path)
  #amplitude_and_sampling_freq = lambda file_path: (np.zeros(1),0)

  dic = {"amp":[], "amp_original":[], "sf":[], "sf_original":[], "shape":[], "shape_original":[], "cough_start_end":[], "label":[]}
  files = []
  for file_name in os.listdir(audio_file_path):
    file_path = os.path.join(audio_file_path,file_name)
    uuid = extract_uuid(file_path)
 
    times = df[ df[filename_index] == uuid ][col_names].dropna(axis=1).values
    frame = []
    if len(times) > 0:
      frame.extend([list(map(float, x.strip().split(','))) for x in times[0] if len(x.strip().split(',')) == 2])

    # if len(times) > 0:
    #   for x in times[0]:
    #     se = x.strip().split(',')
    #     if len(se) == 2:
    #       frame.append([float(se[0]),float(se[1])])

    #print(uuid, times, frame)
    files.append(uuid)
    amp, sf = amplitude_and_sampling_freq(file_path)

    #resample audio file to 16Khz based on Supervisor Request
    sf_resampled = 16000
    amp_resampled = librosa.resample(amp, orig_sr=sf, target_sr=sf_resampled)

    dic["amp"].append(amp_resampled)
    dic["amp_original"].append(amp)
    dic["sf"].append(sf_resampled)
    dic["sf_original"].append(sf)
    dic["shape"].append(amp_resampled.shape)
    dic["shape_original"].append(amp.shape)

    if len(frame) > 0:
      #print(uuid, times)
      dic["cough_start_end"].append(frame)
      dic["label"].append(label_cough_frames(frame, amp_resampled, sf_resampled))
    else:
      dic["cough_start_end"].append(np.nan)
      #30-May-24
      dic["label"].append(label_cough_frames(frame, amp_resampled, sf_resampled))


  return pd.DataFrame(dic, index=files)

def plot_amp_vs_label(title, visual_set, audio_df2, percent=0):
  for i, v in visual_set.items():
    if percent > 0:
      argmax = np.argmax(audio_df2.loc[v]["amp"])
      three_percent = len(audio_df2.loc[v]["amp"])*percent
      start_zoom = int(argmax - three_percent)
      end_zoom = int(argmax + three_percent)
      print(f'Max value {np.max(audio_df2.loc[v]["amp"])} pos {argmax} {start_zoom},{end_zoom}')
      pd.Series( audio_df2.loc[v]["amp"][start_zoom:end_zoom]).plot(figsize=(16, 4),lw=1, title=f'{i} {title} {v}')
      pd.Series( audio_df2.loc[v]["label"][start_zoom:end_zoom]).plot(figsize=(16, 4),lw=2)
    else:
      pd.Series( audio_df2.loc[v]["amp"]).plot(figsize=(16, 4),lw=1, title=f'{i} {title} {v}')
      pd.Series( audio_df2.loc[v]["label"]).plot(figsize=(16, 4),lw=2)

    plt.show()

In [147]:
df = pd.read_csv("../dataset/Dataset workflow.csv")
df1 = df[df["Training"] == 1]
df1

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,uuid,age,gender,status,Cough (Yes (1)/No (0)),Training,IN_OUT_1,IN_OUT_2,...,IN_OUT_7,IN_OUT_8,IN_OUT_9,IN_OUT_10,IN_OUT_11,IN_OUT_12,IN_OUT_13,IN_OUT_14,IN_OUT_15,IN_OUT_16
0,1,Warren,0f8d80f9-700e-4069-924d-e15f29d7c19a,16.0,female,COVID-19,1.0,1,"0.676663,1.172248","1.964468,2.321861",...,,,,,,,,,,
1,2,Warren,3278e364-afdd-4c39-9126-b8b4a4caf1c5,39.0,male,healthy,1.0,1,"4.059530,4.841149","4.935187,5.410265",...,,,,,,,,,,
2,3,Warren,6462d540-48bc-410b-b08e-cdf0b45ee118,45.0,male,COVID-19,1.0,1,"0.776306,1.765430","2.094332,2.619125",...,,,,,,,,,,
3,4,Warren,7876c549-066a-4ea1-a82e-45772114f964,44.0,male,symptomatic,1.0,1,"1.014883,1.671931","1.671931,2.227612",...,,,,,,,,,,
4,5,Warren,ccdb32af-58f4-4718-9f6e-eec7e0020382,31.0,male,COVID-19,1.0,1,"1.931091,2.374866","2.374866,2.817433",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,346,,c98b76a0-089e-4552-8649-2804e2d95403,,female,COVID-19,0.0,1,,,...,,,,,,,,,,
346,347,,ce5ae235-fa9f-4ded-ba7b-623b78cf76a3,,,,0.0,1,,,...,,,,,,,,,,
347,348,,d43e0849-42db-4442-94bf-ace2056fcffc,23.0,male,healthy,0.0,1,,,...,,,,,,,,,,
348,349,,01424527-9c3b-4b6e-96f1-9eea3150819b,39.0,female,COVID-19,0.0,1,,,...,,,,,,,,,,


In [148]:
df = pd.read_csv("../dataset/Dataset workflow.csv")
audio_file_path = "../dataset/audio_files/AWS Audio Files"
audio_df = get_audio_files_and_cough_timeframe_from_csv(df1, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16)

In [156]:
onest = os.listdir(audio_file_path)
onest[0].split
count = 0
for i in os.listdir(audio_file_path):
    curr = i.split(".")[0]
    if curr in df1["uuid"].values:
        count += 1

count

300

In [149]:
audio_df

Unnamed: 0,amp,amp_original,sf,sf_original,shape,shape_original,cough_start_end,label
0029d048-898a-4c70-89c7-0815cdcf7391,"[9.562798e-07, -1.2558273e-06, 1.4575851e-06, ...","[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",16000,22050,"(157440,)","(216972,)","[[0.720196, 1.122022], [1.551667, 1.996767], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [129]:
audio_df = get_audio_files_and_cough_timeframe_from_csv(df, audio_file_path, filename_index='uuid', start_end_time_prefix='IN_OUT_', max_start_end_count=16)

  amplitude_and_sampling_freq = lambda file_path: librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


PermissionError: [Errno 13] Permission denied: '.'

In [None]:
from cough_segmentation.utils.framing import Framing


In [123]:
audio_df

Unnamed: 0,amp,sf,shape,cough_start_end,label
0029d048-898a-4c70-89c7-0815cdcf7391,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",22050,"(216972,)","[[0.720196, 1.122022], [1.551667, 1.996767], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
005b8518-03ba-4bf5-86d2-005541442357,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(142884,)",,
006d8d1c-2bf6-46a6-8ef2-1823898a4733,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(219618,)",,
008ba489-31ad-44d8-856b-fcf72369dc46,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(215649,)","[[4.019377, 4.52724], [4.551424, 4.880326], [4...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
008c1c9e-aeef-40c5-846c-24f1b964f884,"[-1.1011437e-12, -6.4785234e-12, -4.3936547e-1...",22050,"(219618,)","[[2.297893, 2.742448], [2.795401, 3.106958], [...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...
fd7d172d-4106-427a-870b-0384c88f147f,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(72765,)","[[0.631714, 1.039029], [1.076743, 1.4256], [1....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
fd849b72-f4bf-4852-9bc2-fd9becc9571e,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(100548,)","[[1.759509, 2.532103], [2.58552, 2.751634]]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
fed255ec-4829-4f4a-b22d-9bb23f2dd89f,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",22050,"(218295,)","[[2.6532, 3.0888], [3.173657, 3.580971], [3.58...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
ff1234d7-7837-4ba7-842f-99fdc916baa9,"[2.2152083e-12, -2.9893685e-11, 4.2287982e-11,...",22050,"(216972,)","[[3.166311, 3.60722], [3.650956, 4.039854], [6...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [122]:
audio_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 400 entries, 0029d048-898a-4c70-89c7-0815cdcf7391 to ffc99580-678c-46c3-8e3f-c2a19916657d
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   amp              400 non-null    object
 1   sf               400 non-null    int64 
 2   shape            400 non-null    object
 3   cough_start_end  150 non-null    object
 4   label            150 non-null    object
dtypes: int64(1), object(4)
memory usage: 34.9+ KB


In [8]:
amplitude_and_sampling_freq = librosa.load("../dataset/audio_files/AWS Audio Files/005b8518-03ba-4bf5-86d2-005541442357.wav")
amplitude_and_sampling_freq

(array([ 0.        ,  0.        ,  0.        , ..., -0.02018352,
        -0.01895304, -0.01745526], dtype=float32),
 22050)

In [9]:
# audio_df2 = audio_df.dropna()
# audio_df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 150 entries, 0029d048-898a-4c70-89c7-0815cdcf7391 to ff1234d7-7837-4ba7-842f-99fdc916baa9
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   amp              150 non-null    object
 1   sf               150 non-null    int64 
 2   shape            150 non-null    object
 3   cough_start_end  150 non-null    object
 4   label            150 non-null    object
dtypes: int64(1), object(4)
memory usage: 7.0+ KB


In [166]:
def apply_framing(audio_df):
  def create_overlapping_frames(key, amp, label, sf, frame_size, hop_length):
    # Calculate the total number of frames
    if len(amp) == len(label):
      total_frames = 1 + int((len(amp) - frame_size) / hop_length)
      dic = {"key":[], "sf":[], "start":[], "end":[], "max_amp":[], "frame_index":[], "amp":[], "label":[]}

      # Create overlapping frames
      for i in range(total_frames):
        dic["key"].append(key)
        dic["sf"].append(sf)
        dic["start"].append(i * hop_length)
        dic["end"].append(i * hop_length + frame_size)
        dic["max_amp"].append(np.max(amp[i * hop_length: i * hop_length + frame_size]))
        dic["frame_index"].append(i)
        dic["amp"].append(amp[i * hop_length: i * hop_length + frame_size])
        frames_label_raw = label[i * hop_length: i * hop_length + frame_size]

        f_label = 0
        if np.sum(frames_label_raw==1) > (len(frames_label_raw) / 2):
          f_label = 1
        dic["label"].append(f_label)

      return dic
    else:
      print('Error: non matching amp and labels', key, len(amp), len(label))

  def create_and_label_frames(audio_df):
    return create_overlapping_frames(audio_df.name, audio_df["amp"], audio_df["label"],audio_df["sf"],frame_size, hop_length)

  all_frames = {}
  for frame_size in [256, 512, 1024]:
    hop_length = frame_size // 2

    frame_df = pd.DataFrame()
    #xx = audio_df.head(1).apply(create_and_label_frames, axis=1)
    xx = audio_df.apply(create_and_label_frames, axis=1)
    for x in xx:
      if len(frame_df) > 0:
        frame_df = pd.concat([frame_df, pd.DataFrame(x)], ignore_index=True)
      else:
        frame_df = pd.DataFrame(x)

    all_frames[frame_size] = frame_df
    print(f'Frame size {frame_size}, hop_length {hop_length}, count {len(frame_df)}')
  return all_frames


all_frames = apply_framing(audio_df=audio_df2)
framed_df = all_frames[1024]
framed_df

Frame size 256, hop_length 128, count 226033
Frame size 512, hop_length 256, count 112895
Frame size 1024, hop_length 512, count 56337


Unnamed: 0,key,sf,start,end,max_amp,frame_index,amp,label
0,0029d048-898a-4c70-89c7-0815cdcf7391,22050,0,1024,0.365307,0,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",0
1,0029d048-898a-4c70-89c7-0815cdcf7391,22050,512,1536,0.365307,1,"[6.083851e-05, 6.2495135e-05, 4.0901323e-05, 2...",0
2,0029d048-898a-4c70-89c7-0815cdcf7391,22050,1024,2048,0.062898,2,"[-0.21390674, -0.21399471, -0.21432504, -0.213...",0
3,0029d048-898a-4c70-89c7-0815cdcf7391,22050,1536,2560,0.003421,3,"[0.0034214016, 0.002930639, 0.0025067274, 0.00...",0
4,0029d048-898a-4c70-89c7-0815cdcf7391,22050,2048,3072,0.002464,4,"[0.00055328425, 0.00037316832, 0.00033341238, ...",0
...,...,...,...,...,...,...,...,...
56332,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,213504,214528,0.000000,417,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56333,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,214016,215040,0.000000,418,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56334,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,214528,215552,0.000000,419,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56335,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,215040,216064,0.000000,420,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0


In [171]:
framed_df["amp"][3].shape

(1024,)

In [10]:
df_to_save = audio_df.copy()
df_to_save.reset_index(inplace=True)
df_to_save.to_feather("../dataset/audio_data_before_framing.feather")

In [None]:
df_from_save = pd.read_feather("../dataset/audio_data_before_framing.feather")
df_from_save['key'] = df_from_save['index']
df_from_save.drop(columns=['index'], inplace=True)
df_from_save.set_index('key', inplace=True)
df_from_save

In [21]:
all_frames_from_save = apply_framing(audio_df=audio_df2)

Frame size 256, hop_length 128, count 226033
Frame size 512, hop_length 256, count 112895
Frame size 1024, hop_length 512, count 56337


In [None]:
df_frame_from_save = all_frames_from_save[1024]
df_frame_from_save

In [23]:
df_frame_from_save["amp"].values[0].shape
df_frame_from_save["amp"].values[1].shape

(1024,)

In [164]:
framed_df["amp"].values[0].shape

(1024,)

In [165]:
sr = 22050
# Step 2: Compute the Mel spectrogram
# S = librosa.feature.melspectrogram(y=df_frame_from_save["amp"].values[0], sr=sr, n_mels=256)

# Step 3: Convert to Decibel scale (log scale)
mfccs = librosa.feature.mfcc(y=framed_df["amp"].values[0], sr=sr, n_mfcc=64)
# mfccs.shape
mfccs_second_derivative = librosa.feature.delta(mfccs, order=2, mode="nearest",width=9)

mfccs_second_derivative.shape
# mfccs



(64, 3)

In [25]:
mfccs_second_derivative

array([[ 6.24215078e+00, -3.88640952e+00, -1.28490477e+01],
       [ 1.33424312e-01, -6.85519278e-02, -2.49962583e-01],
       [ 7.17767656e-01, -1.13600217e-01, -9.10888016e-01],
       [ 1.21046215e-01, -1.08464561e-01, -3.05435956e-01],
       [ 2.97245950e-01, -1.29995137e-01, -5.18237710e-01],
       [ 2.07655579e-01, -8.19296017e-02, -3.46935898e-01],
       [ 1.30293891e-01, -1.20247245e-01, -3.34714204e-01],
       [ 3.45802724e-01, -1.30102232e-01, -5.66976488e-01],
       [-1.51654318e-01, -1.88008007e-02,  1.19692959e-01],
       [-9.96573642e-02, -3.76331657e-02,  3.56809832e-02],
       [-3.96112770e-01, -5.89364534e-03,  3.86093557e-01],
       [-2.59210974e-01, -5.43074384e-02,  1.66888326e-01],
       [-3.86992060e-02, -2.66789775e-02, -6.65505603e-03],
       [ 2.45813772e-01, -1.91094559e-02, -2.78299838e-01],
       [ 2.68773466e-01, -7.01025277e-02, -3.87947738e-01],
       [-1.31204739e-01, -6.95563108e-02,  1.29590202e-02],
       [-8.04125667e-02, -7.59424195e-02

In [26]:
mfccs.shape

(128, 3)

In [27]:
mfccs = librosa.feature.mfcc(y=df_frame_from_save["amp"].values[0], sr=sr, n_mfcc=13)
mfccs

array([[-3.86121643e+02, -1.77154587e+02, -1.47739655e+02],
       [ 1.01281769e+02,  1.05258896e+02,  1.06068924e+02],
       [-1.26125889e+01,  1.79669589e-01,  7.72359800e+00],
       [ 2.65064583e+01,  3.16746216e+01,  3.18317223e+01],
       [ 1.04343185e+01,  1.85284691e+01,  2.06168442e+01],
       [ 7.84082699e+00,  1.31958504e+01,  1.47657261e+01],
       [ 8.10751724e+00,  1.37883873e+01,  1.39138346e+01],
       [-2.71352673e+00,  5.99053764e+00,  8.68387890e+00],
       [ 1.44336033e+01,  1.31738148e+01,  1.10454292e+01],
       [ 9.95032120e+00,  9.97469711e+00,  8.26042080e+00],
       [ 1.97509727e+01,  1.50036116e+01,  9.98396397e+00],
       [ 1.41571884e+01,  1.27514486e+01,  8.83670521e+00],
       [ 4.23081398e+00,  4.64703894e+00,  3.83069515e+00]], dtype=float32)

In [29]:
mfccs.flatten()

array([-3.86121643e+02, -1.77154587e+02, -1.47739655e+02,  1.01281769e+02,
        1.05258896e+02,  1.06068924e+02, -1.26125889e+01,  1.79669589e-01,
        7.72359800e+00,  2.65064583e+01,  3.16746216e+01,  3.18317223e+01,
        1.04343185e+01,  1.85284691e+01,  2.06168442e+01,  7.84082699e+00,
        1.31958504e+01,  1.47657261e+01,  8.10751724e+00,  1.37883873e+01,
        1.39138346e+01, -2.71352673e+00,  5.99053764e+00,  8.68387890e+00,
        1.44336033e+01,  1.31738148e+01,  1.10454292e+01,  9.95032120e+00,
        9.97469711e+00,  8.26042080e+00,  1.97509727e+01,  1.50036116e+01,
        9.98396397e+00,  1.41571884e+01,  1.27514486e+01,  8.83670521e+00,
        4.23081398e+00,  4.64703894e+00,  3.83069515e+00], dtype=float32)

In [172]:
framed_df

Unnamed: 0,key,sf,start,end,max_amp,frame_index,amp,label
0,0029d048-898a-4c70-89c7-0815cdcf7391,22050,0,1024,0.365307,0,"[-4.574141e-08, 4.00641e-08, -4.5631047e-08, 3...",0
1,0029d048-898a-4c70-89c7-0815cdcf7391,22050,512,1536,0.365307,1,"[6.083851e-05, 6.2495135e-05, 4.0901323e-05, 2...",0
2,0029d048-898a-4c70-89c7-0815cdcf7391,22050,1024,2048,0.062898,2,"[-0.21390674, -0.21399471, -0.21432504, -0.213...",0
3,0029d048-898a-4c70-89c7-0815cdcf7391,22050,1536,2560,0.003421,3,"[0.0034214016, 0.002930639, 0.0025067274, 0.00...",0
4,0029d048-898a-4c70-89c7-0815cdcf7391,22050,2048,3072,0.002464,4,"[0.00055328425, 0.00037316832, 0.00033341238, ...",0
...,...,...,...,...,...,...,...,...
56332,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,213504,214528,0.000000,417,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56333,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,214016,215040,0.000000,418,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56334,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,214528,215552,0.000000,419,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
56335,ff1234d7-7837-4ba7-842f-99fdc916baa9,22050,215040,216064,0.000000,420,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0


In [157]:
# y, sr = librosa.load(file_path, sr=22050)  # sr=22050 is the default sampling rate

def extract_features(amplitude):
    sr = 22050
    # Step 2: Compute the Mel spectrogram
    # S = librosa.feature.melspectrogram(y=amplitude, sr=sr, n_mels=256)

    # Step 3: Convert to Decibel scale (log scale)
    mfccs = librosa.feature.mfcc(y=amplitude, sr=sr, n_mfcc=64,n_fft=64)
    
    mfccs_second_derivative = librosa.feature.delta(mfccs, order=3, mode="interp",width=9)
    
    return mfccs_second_derivative

framed_df["mfccs"] = framed_df["amp"].apply(lambda x: extract_features(x))
framed_df

ParameterError: when mode='interp', width=9 cannot exceed data.shape[axis]=3

In [33]:
# df_frame_from_save["mfccs"].to_csv("test.csv",index=False)

In [62]:
framed_df["mfccs"][0].shape

(128, 3)

In [15]:
framed_df["mfccs"][0].reshape(-1).shape

(192,)

In [16]:
# preparing data for ml
data = []
for i in range(len(framed_df)):
    data.append(framed_df["mfccs"][i].reshape(-1))
X = np.array(data)

In [17]:
X[0].shape

(192,)

In [18]:
X.shape

(56337, 192)

# Model Building

In [19]:
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

In [20]:
# X = df_frame_from_save["mfccs"]
y = framed_df["label"]

In [21]:
y

0        0
1        0
2        0
3        0
4        0
        ..
56332    0
56333    0
56334    0
56335    0
56336    0
Name: label, Length: 56337, dtype: int64

In [22]:
y.value_counts()

label
0    44249
1    12088
Name: count, dtype: int64

In [23]:
y.value_counts()/y.shape

label
0    0.785434
1    0.214566
Name: count, dtype: float64

In [24]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [25]:
X_train[0].shape

(192,)

# Logistic Regression

In [26]:
lr = LogisticRegression(penalty="elasticnet",verbose=1,l1_ratio=0.4,solver="saga")

In [27]:
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)


max_iter reached after 19 seconds




In [28]:
accuracy_score(y_test,y_pred)

0.8774405395811147

In [29]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.90      0.94      0.92      8803
           1       0.76      0.64      0.69      2465

    accuracy                           0.88     11268
   macro avg       0.83      0.79      0.81     11268
weighted avg       0.87      0.88      0.87     11268



# XGBoost

In [32]:
xgb = XGBClassifier()
xgb.fit(X_train,y_train)
y_pred = xgb.predict(X_test)


In [33]:
accuracy_score(y_test,y_pred)

0.8775292864749734

In [34]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.91      0.94      0.92      8803
           1       0.75      0.66      0.70      2465

    accuracy                           0.88     11268
   macro avg       0.83      0.80      0.81     11268
weighted avg       0.87      0.88      0.87     11268



# Preparing Data for CNN

In [113]:
# preparing data for cnn
data = []
for i in range(len(framed_df)):
    data.append(framed_df["mfccs"][i].reshape(64,1,3))
X = np.array(data)
X.shape

(56337, 64, 1, 3)

In [98]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from imblearn.over_sampling import SMOTE

In [94]:
X[0].shape

(64, 3)

In [117]:
def create_cnn_model(input_shape):
    model = Sequential([
        Conv2D(64, kernel_size=(2, 2), activation='relu', input_shape=input_shape,padding="same"),
        # Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=input_shape,padding="same"),
        # MaxPooling2D(pool_size=(2,2)),
        Flatten(),
        Dense(128, activation='relu'),
        # Dense(8, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_cnn_model(input_shape=X[0].shape)
model.summary()

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 64, 1, 64)         832       
                                                                 
 flatten_21 (Flatten)        (None, 4096)              0         
                                                                 
 dense_48 (Dense)            (None, 128)               524416    
                                                                 
 dense_49 (Dense)            (None, 1)                 129       
                                                                 
Total params: 525,377
Trainable params: 525,377
Non-trainable params: 0
_________________________________________________________________


In [102]:
num_samples, height, width = X.shape
X_reshaped = X.reshape((num_samples, height * width))

In [118]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [104]:
X_train.shape

(45069, 192)

In [105]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [106]:
X_train_resampled_reshaped = X_train_resampled.reshape((-1, height, width))


In [108]:
X_train_resampled_reshaped.shape

(70892, 64, 3)

In [119]:
history = model.fit(X_train, y_train, epochs=4, batch_size=32, validation_data=(X_test,y_test))


Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [120]:
from cough_segmentation.utils.sono_cross_val import CrossValSplit

In [None]:
cv = CrossValSplit(framed_df,)