## Libraries


In [1]:
import os

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio
from tensorflow.keras.applications import ResNet50

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from collections import Counter
import tensorflow_addons as tfa

from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
import keras

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, StratifiedKFold




TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.11.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
random_state = 1234

str_back = 'background'
str_embeddings = 'Embeddings'
str_fore = 'foreground'
str_fold = 'fold'
str_filename = 'filename'
str_target = 'target'

#############  Tensorflow Random State  #########################
tf.keras.utils.set_random_seed(random_state)

In [3]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [5]:
import os
import zipfile
import re

def extract_all_zips():
  loc_file_folder = 'C:/Users/wyd2hu/OneDrive - University of Virginia/Katha/dataverse_files/'
  data_list_df = []

  for outer_folder in os.listdir(loc_file_folder):
    fore_or_back = str_fore if str_fore in outer_folder else str_back
    for wav_file in os.listdir(os.path.join(loc_file_folder, outer_folder, fore_or_back)):
        if wav_file.endswith('.wav'):
          data_list_df.append([os.path.join(loc_file_folder, outer_folder, fore_or_back, wav_file),
                              1 if str_fore in outer_folder else 0,
                              int(re.findall(r'\d+', outer_folder)[0])])
  
  df_path = pd.DataFrame(data = data_list_df, columns = [str_filename, str_target, 'Dataset Fold'])
  df_path.to_csv('file_path.csv', index=False)
  print(df_path)
  print(df_path.shape)

extract_all_zips()

                                                filename  target  Dataset Fold
0      C:/Users/wyd2hu/OneDrive - University of Virgi...       1             1
1      C:/Users/wyd2hu/OneDrive - University of Virgi...       1             1
2      C:/Users/wyd2hu/OneDrive - University of Virgi...       1             1
3      C:/Users/wyd2hu/OneDrive - University of Virgi...       1             1
4      C:/Users/wyd2hu/OneDrive - University of Virgi...       1             1
...                                                  ...     ...           ...
79473  C:/Users/wyd2hu/OneDrive - University of Virgi...       1             3
79474  C:/Users/wyd2hu/OneDrive - University of Virgi...       1             3
79475  C:/Users/wyd2hu/OneDrive - University of Virgi...       1             3
79476  C:/Users/wyd2hu/OneDrive - University of Virgi...       1             3
79477  C:/Users/wyd2hu/OneDrive - University of Virgi...       1             3

[79478 rows x 3 columns]
(79478, 3)


# Transfer the learning from YAMNet

In [4]:
# Utility functions for loading audio files and making sure the sample rate is correct.
# applies the embedding extraction model to a wav data
def extract_embedding(wav_data, label, fold):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(fold, num_embeddings))

def load_wav_for_map(filename, label, fold):
  return load_wav_16k_mono(filename), label, fold

@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav


def extract_embedding_for_all_data(wav_data, label, filename):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(filename, num_embeddings))

def load_wav_for_map_for_all_data(filename, label):
  return load_wav_16k_mono(filename), label, filename

In [5]:
def get_clip_level_prediction(list_actual_class, predicted_proba):
  zip_true_predicted = list(zip(list_actual_class, predicted_proba))

  list_y_true = []
  list_y_pred = []

  for first_frame, second_frame in zip(zip_true_predicted[::2], zip_true_predicted[1::2]):

    if first_frame[0] != second_frame[0]:
      print('\n\n\n\nSevere problem\n\n\n\n')
    else:
      list_y_true.append(first_frame[0])

      clip_prob = np.array([first_frame[1], second_frame[1]])
      class_in_frames = clip_prob.argmax(axis=-1)
      if len(np.unique(class_in_frames)) == 1: # means the predicted class is same in each frame:
        list_y_pred.append(class_in_frames[0])
      else: # means the predicted class differs between the first and second frame of 1 second audio clip
        list_y_pred.append(np.argmax(np.sum(clip_prob, axis=0)))

  return list_y_true, list_y_pred

In [6]:
def get_df_g_aud_loc():
  loc_g_aud = 'C:/Users/wyd2hu/S2He/AudData/Google AudioSet_Balanced_Train/Speech//10_seconds'
  list_loc_g_aud_files = loc_g_aud +'//'+ pd.Series(os.listdir(loc_g_aud))
  df_g_aud_loc = pd.DataFrame({str_filename: list_loc_g_aud_files,
                               str_target: np.repeat(1, len(list_loc_g_aud_files))})
  df_g_aud_loc.to_excel('google_aud_speech_class_data.xlsx', index=False)
  print(df_g_aud_loc)

get_df_g_aud_loc()

                                              filename  target
0    C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
1    C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
2    C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
3    C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
4    C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
..                                                 ...     ...
339  C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
340  C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
341  C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
342  C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1
343  C:/Users/wyd2hu/S2He/AudData/Google AudioSet_B...       1

[344 rows x 2 columns]


In [None]:
df_path = pd.read_csv('file_path.csv')
df_path = shuffle(df_path, random_state = random_state)
df_path.reset_index(inplace=True, drop=True)

list_all_files = df_path[str_filename].tolist()
list_all_labels = df_path[str_target].tolist()

df_g_aud_loc = pd.read_excel('google_aud_speech_class_data.xlsx')
list_all_files.extend(df_g_aud_loc[str_filename])
list_all_labels.extend(df_g_aud_loc[str_target])

main_ds_all_data = tf.data.Dataset.from_tensor_slices((list_all_files, list_all_labels))
main_ds_all_data = main_ds_all_data.map(load_wav_for_map_for_all_data)
main_ds_all_data = main_ds_all_data.map(extract_embedding_for_all_data).unbatch()

df_embedding_all_data = pd.DataFrame(columns = [str_embeddings, str_target, str_filename])
for emb, label, file_name in main_ds_all_data:
  df_embedding_all_data.loc[-1] = [emb, label, file_name.numpy().decode()]
  df_embedding_all_data.index += 1
  df_embedding_all_data = df_embedding_all_data.sort_index()

print(df_embedding_all_data)


## Model Training, Validation, and Test

In [6]:
df_embedding_all_data = pd.read_pickle('embedd_dataverse_g_aud_balanced_trained.pkl')

In [15]:
print(str(df_embedding_all_data.head(2)[str_embeddings].tolist()))

[<tf.Tensor: shape=(1024,), dtype=float32, numpy=
array([2.1769218 , 0.31531605, 0.8513692 , ..., 0.3291769 , 2.1372237 ,
       3.8863742 ], dtype=float32)>, <tf.Tensor: shape=(1024,), dtype=float32, numpy=
array([2.1564043 , 0.2940428 , 0.8860374 , ..., 0.36237454, 2.1497383 ,
       3.9327104 ], dtype=float32)>]


In [7]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                tfa.metrics.F1Score(name='f1_score', num_classes=1, threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]

def train_val_test():
  global all_predicted_proba, all_list_actual_class, df_embedding_all_data, METRICS
  all_predicted_proba = []
  all_list_actual_class = []

  batch_size = 32
  n_epoch = 25

  df_path = pd.read_csv('file_path.csv')
  df_path = shuffle(df_path, random_state = random_state)
  df_path.reset_index(inplace=True, drop=True)

  my_classes = set(df_path[str_target])
  validation_ratio = 0.10
  test_ratio = 0.10

  for batch_size in [32]:

    skf = StratifiedKFold(n_splits=20, shuffle=True, random_state = random_state)

    for ith_fold, (train_index, test_index) in enumerate(skf.split(df_path[str_filename], df_path[str_target])):

      # Keeping the filenames in x_train since just based on the filenames, the embedddings will be retrieved.
      x_train = df_path.iloc[train_index][str_filename].tolist()
      y_train = df_path.iloc[train_index][str_target].tolist()

      x_test = df_path.iloc[test_index][str_filename].tolist()
      y_test = df_path.iloc[test_index][str_target].tolist()

      x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size = test_ratio / (test_ratio + validation_ratio),
                                                      shuffle=True, stratify = y_test, random_state = random_state)

      df_temp_train = pd.DataFrame({str_filename: x_train, str_target: y_train})
    #   df_original_class_0 = df_temp_train[df_temp_train[str_target] == 0].copy()
    #   df_train_class_1 = df_temp_train[df_temp_train[str_target] == 1].copy()

    #   df_g_aud_loc = pd.read_excel('google_aud_speech_class_data.xlsx')
    #   df_train_class_0 = df_original_class_0.iloc[:(df_train_class_1.shape[0] + int((int(10/0.48) * df_g_aud_loc.shape[0]) / 2))].copy()
    #   df_temp_train = pd.concat([df_train_class_1, df_train_class_0])

    #   df_temp_train = pd.concat([df_temp_train, df_g_aud_loc])
      df_temp_train[str_fold] = np.repeat(1, df_temp_train.shape[0]) # 1 is used (randomly) to denote the train fold everywhere

      df_temp_val = pd.DataFrame({str_filename: x_val, str_target: y_val})
      df_temp_val[str_fold] = np.repeat(2, df_temp_val.shape[0])

      df_temp_test = pd.DataFrame({str_filename: x_test, str_target: y_test})
      df_temp_test[str_fold] = np.repeat(3, df_temp_test.shape[0])

      pd_data = pd.concat([df_temp_train, df_temp_val, df_temp_test])

      for train_fold, val_fold, test_fold in ((1, 2, 3), (1, 3, 2)):
        print(train_fold, val_fold, test_fold)
        filenames = pd_data[str_filename]
        targets = pd_data[str_target]
        folds = pd_data[str_fold]

        train_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == train_fold][str_filename])].copy()
        
        train_ds = shuffle(train_ds, random_state = random_state)
        train_ds.reset_index(inplace=True, drop=True)

        neg, pos = np.bincount(train_ds[str_target])
        
        total = neg + pos
        weight_for_0 = (1 / neg) * (total / 2.0)
        weight_for_1 = (1 / pos) * (total / 2.0)

        class_weight = {0: weight_for_0, 1: weight_for_1}

        print('\n\nTrain', train_ds[train_ds[str_target] == 1].shape[0], train_ds[train_ds[str_target] == 0].shape[0])

        train_ds = tf.data.Dataset.from_tensor_slices((train_ds[str_embeddings].tolist(), train_ds[str_target].tolist()))

        val_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == val_fold][str_filename])].copy()
        val_ds = tf.data.Dataset.from_tensor_slices((val_ds[str_embeddings].tolist(), val_ds[str_target].tolist()))

        test_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == test_fold][str_filename])].copy()
        test_ds = tf.data.Dataset.from_tensor_slices((test_ds[str_embeddings].tolist(), test_ds[str_target].tolist()))

        clip_level_list_actual_class =  list(map(lambda x: x[1].numpy(), test_ds))
        print(len(clip_level_list_actual_class))
        # print(clip_level_list_actual_class)

        train_ds = train_ds.cache().shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        val_ds = val_ds.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)
        test_ds = test_ds.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

        keras_yamnet_model = keras.Sequential([hub.KerasLayer(yamnet_model)])
        leaky_relu = keras.layers.LeakyReLU(alpha=0.1)

        model_B_on_A = keras.models.Sequential(keras_yamnet_model.layers[:-1])
        model_B_on_A.add(keras.layers.Dense(2048, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(1024, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(512, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(len(my_classes)-1, activation ='sigmoid'))

        for layer in model_B_on_A.layers[:-1]:
          layer.trainable = False

        optimizer = keras.optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999)
        model_B_on_A.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=optimizer,
                            metrics= list_metrics)
        callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        history = model_B_on_A.fit(train_ds, epochs=2,
                                  validation_data=val_ds,
                                  callbacks = callback,
                                  class_weight=class_weight,
                                  verbose=1)
        
        dict_predict_test = model_B_on_A.evaluate(test_ds, return_dict=True)
        specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))


        print('\n\n\n Without retrain all layers ')
        print('Test Performance\n\n', batch_size, ith_fold, dict_predict_test.get('precision'), dict_predict_test.get('recall'),
               specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
        print('Hey, retraining :)')

        for layer in model_B_on_A.layers[:-1]:
          layer.trainable = True

        optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
        model_B_on_A.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=optimizer,
                             metrics=list_metrics)
        history = model_B_on_A.fit(train_ds, epochs=n_epoch,
                                   validation_data=val_ds,
                                   class_weight=class_weight,
                                   callbacks = callback, verbose=1)

        
        predicted_proba = model_B_on_A.predict(test_ds)
        flat_list = []
        [flat_list.extend(sublist) for sublist in predicted_proba]
        print(len(clip_level_list_actual_class), len(flat_list))

        all_predicted_proba.extend(flat_list)
        all_list_actual_class.extend(clip_level_list_actual_class)
        
        dict_predict_test = model_B_on_A.evaluate(test_ds, return_dict=True)

        print('\n\n\n After retrain all layers ')
        print('Test Performance\n\n', batch_size, ith_fold, dict_predict_test.get('precision'), dict_predict_test.get('recall'),
               specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')

train_val_test()

1 2 3


Train 51284 99724
3974


  super().__init__(name, **kwargs)


Epoch 1/2
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Epoch 2/2



 Without retrain all layers 
Test Performance

 32 0 0.622020423412323 0.8118518590927124 0.7461890243902439 0.7684952020645142 0.8499782681465149 0.7790204417414781 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
3974 3974



 After retrain all layers 
Test Performance

 32 0 0.7567567825317383 0.8296296000480652 0.7461890243902439 0.8515349626541138 0.910820484161377 0.7879093122191545 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 0 0.6337245106697083 0.7611275911331177 0.7741812642802742 0.7697533965110779 0.8334912061691284 0.7676544277066959 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
3974 3974



 After retrain all layers 
Test Performance

 32 0 0.7719429731369019 0.7633531093597412 0.7741812642802742 0.8432310223579407 0.8965660333633423 0.7

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 1 0.5997770428657532 0.7970370650291443 0.7263719512195121 0.7503774762153625 0.8350797891616821 0.7617045081243282 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
3974 3974



 After retrain all layers 
Test Performance

 32 1 0.7491115927696228 0.7807407379150391 0.7263719512195121 0.8366884589195251 0.8992928862571716 0.7535563445672756 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 1 0.6236373782157898 0.8063797950744629 0.7501904036557502 0.7692501544952393 0.8477535247802734 0.7782850993651065 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
3974 3974



 After retrain all layers 
Test Performance

 32 1 0.7557732462882996 0.8011869192123413 0.75019040365

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 2 0.6123188138008118 0.7511110901832581 0.7553353658536586 0.7539003491401672 0.8275758028030396 0.7532232280184583 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
3974 3974



 After retrain all layers 
Test Performance

 32 2 0.7454412579536438 0.7570370435714722 0.7553353658536586 0.829642653465271 0.8872003555297852 0.7561862047125654 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 2 0.6351595520973206 0.7826409339904785 0.7692307692307693 0.773779571056366 0.8501806855201721 0.7759358516106238 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
3974 3974



 After retrain all layers 
Test Performance

 32 2 0.7449051141738892 0.7863501310348511 0.7692307692307

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 3 0.6327649354934692 0.7696296572685242 0.7701981707317073 0.7700050473213196 0.8414031267166138 0.7699139140001157 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
3974 3974



 After retrain all layers 
Test Performance

 32 3 0.7548806667327881 0.7733333110809326 0.7701981707317073 0.8376950025558472 0.8985440135002136 0.7717657409063199 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 3 0.6139130592346191 0.7856082916259766 0.7463823305407464 0.7596879601478577 0.8394952416419983 0.7659953110833615 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
3974 3974



 After

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 4 0.6151586174964905 0.7755555510520935 0.7503810975609756 0.7589330673217773 0.8350427150726318 0.7629683243065346 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
3974 3974



 After retrain all layers 
Test Performance

 32 4 0.7429971694946289 0.7859259247779846 0.7503810975609756 0.8349270224571228 0.896602988243103 0.7681535111694802 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 4 0.6188207268714905 0.7707715034484863 0.7562833206397562 0.7611978054046631 0.8394819498062134 0.7635274120441213 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
3974 3974



 After retrain all layers 
Test Performance

 32 4 0.7273342609405518 0.7974777221679688 0.7562833206397562 0.8298943042755127 0.89673793

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 5 0.6342648863792419 0.7733333110809326 0.770579268292683 0.7715148329734802 0.8452537655830383 0.7719562896868077 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
3974 3974



 After retrain all layers 
Test Performance

 32 5 0.7432065010070801 0.8103703856468201 0.770579268292683 0.8404629826545715 0.9041159152984619 0.7904748269697515 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 5 0.6147540807723999 0.778931736946106 0.7494287890327495 0.759436309337616 0.8416605591773987 0.7641802629894276 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
3974 3974



 After retrain all layers 
Test Performance

 32 5 0.74260

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 6 0.6326409578323364 0.7896296381950378 0.7641006097560976 0.772773027420044 0.8394266366958618 0.7768651239755677 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
3974 3974



 After retrain all layers 
Test Performance

 32 6 0.7540029287338257 0.7674074172973633 0.7641006097560976 0.8359335660934448 0.9074642062187195 0.7657540135267304 



1 3 2


Train 51284 99724
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 6 0.6142520904541016 0.7737388610839844 0.7505712109672505 0.758429765701294 0.8295162320137024 0.7621550360256175 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
3974 3974



 After retrain all layers 
Test Performance

 32 6 0.7596223950386047 0.7759643793106079 0.7505712109672

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 7 0.6040462255477905 0.7740740776062012 0.7389481707317073 0.7508807182312012 0.8272006511688232 0.7565111241689542 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
3974 3974



 After retrain all layers 
Test Performance

 32 7 0.7317763566970825 0.765925943851471 0.7389481707317073 0.8251132369041443 0.8903567790985107 0.752437057291589 



1 3 2


Train 51282 99726
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 7 0.6308439373970032 0.7696296572685242 0.7682926829268293 0.7687468528747559 0.8384689092636108 0.7689611700976767 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
3974 3974



 After retrain all layers 
Test Performance

 32 7 0.7507022619247437 0.7918518781661987 0.7682926829268293 0.839959

  super().__init__(name, **kwargs)


Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 8 0.6227405071258545 0.7911111116409302 0.7534298780487805 0.7662305235862732 0.845126211643219 0.7722704948448553 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
3974 3974



 After retrain all layers 
Test Performance

 32 8 0.7536132335662842 0.8111110925674438 0.7534298780487805 0.8457473516464233 0.9110209345817566 0.7822704853081122 



1 3 2


Train 51282 99726
3974
Epoch 1/2
Epoch 2/2



 Without retrain all layers 
Test Performance

 32 8 0.6126389503479004 0.7755555510520935 0.7477134146341463 0.757171630859375 0.8271723985671997 0.7616344828431199 



Hey, retraining :)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25

KeyboardInterrupt: 

In [23]:
# Standalone usage:
initializer = RandomNormal(mean=0.0, stddev=1.0)
values = initializer(shape=(2, 2))
print(values)

tf.Tensor(
[[-0.4033396   0.00977849]
 [-0.28203115  0.8355724 ]], shape=(2, 2), dtype=float32)


In [27]:
print(tf.data.Dataset.from_tensor_slices([5]))

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>


In [27]:
list_predict_class = all_predicted_proba
list_actual_class = all_list_actual_class
precision = precision_score(y_true=list_actual_class, y_pred=list_predict_class)
recall = recall_score(y_true=list_actual_class, y_pred=list_predict_class)
specificity = recall_score(y_true=list_actual_class, y_pred=list_predict_class, pos_label=0)
f1 = f1_score(y_true=list_actual_class, y_pred=list_predict_class, average='macro')
acc = accuracy_score(y_true=list_actual_class, y_pred=list_predict_class)

print(precision, recall, specificity, f1, acc, (recall + specificity)/2)

0.6326482677627716 0.5987551406024231 0.8494078377250698 0.7274504302341727 0.773652091148312 0.7240814891637464


# Downloading Google Audio Dataset

In [54]:
import pandas as pd

df_class_label = pd.read_csv("C:/Users/wyd2hu/Downloads/archive/class_labels_indices.csv")
df_file_name = pd.read_csv("C:/Users/wyd2hu/Downloads/archive/train.csv")
loc_root_wav = 'C:/Users/wyd2hu/Downloads/archive/train_wav/'

list_file_mid = []
list_class_names = []

for index, row_class in df_class_label.iterrows():
    if 'speech' in row_class['display_name'].lower() and 'noise' not in row_class['display_name'].lower():
        list_file_mid.append(row_class['mid'])
        list_class_names.append(row_class['display_name'])

def has_mid_name(value_list):
  return any(value in value_list for value in list_file_mid)

print(list_class_names)

['Speech', 'Male speech, man speaking', 'Female speech, woman speaking', 'Child speech, kid speaking', 'Speech synthesizer']


In [None]:
from audioset_download import Downloader






# list_class_names.append('Narration, monologue')
# print(list_class_names)
list_class_names = ['Speech']






d = Downloader(root_path='C:/Users/wyd2hu/S2He/AudData/Google AudioSet_Balanced_Train/', labels=list_class_names, n_jobs=13, download_type='balanced_train', copy_and_replicate=False)
d.download(format = 'wav')

In [None]:
df_file_name['positive_labels'] = df_file_name['positive_labels'].apply(lambda x: x.split(','))
filtered_df = df_file_name[df_file_name['positive_labels'].apply(has_mid_name)]

for ytid in filtered_df['YTID']:
    loc_wav = loc_root_wav + ytid + '.wav'
    audio_data, sample_rate = librosa.load(loc_wav)
    duration = librosa.get_duration(y=audio_data, sr=sample_rate)
    print(duration)
    try:
        extract_features(loc_wav, True)
    except Exception as e:
        print(e)

# ResNet50 Tuning Short Google Audio Dataset

In [None]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
plt.ioff()

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  for class_folder in os.listdir(loc_data_folder):
    if 'Class_0' in class_folder:
      n_file_processing = 0
      n_class_instance = 0
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))
      for file_name in os.listdir(os.path.join(loc_data_folder, class_folder)):
        if file_name.endswith('.wav') and (not os.path.exists(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))) and (librosa.get_duration(filename=os.path.join(loc_data_folder, class_folder, file_name)) > 0) :
          n_class_instance += 1
          signal, sr = librosa.load(os.path.join(loc_data_folder, class_folder, file_name))
          stft = librosa.stft(signal)

          fig, ax = plt.subplots()
          img = librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), ax=ax)
          fig.colorbar(img, ax=ax, format="%+2.0f dB")
          fig.savefig(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))
          plt.close(fig)
        
        n_file_processing += 1
        print(n_file_processing, n_class_instance, class_folder, os.path.basename(loc_data_folder))

# create_spectram_plot("C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Train_Speech",
#                      "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud")
# create_spectram_plot("C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Test_Speech",
#                      "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud")

In [36]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam

keras_ResNet_model = tf.keras.Sequential([hub.KerasLayer(ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)))])

  from .autonotebook import tqdm as notebook_tqdm


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [99]:
from keras.preprocessing.image import ImageDataGenerator
image_size = 150
input_size = 150
train_dir = "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud/"
from keras.preprocessing.image import ImageDataGenerator
import re

df_train_dir = pd.DataFrame(columns = [str_filename, str_target])
for class_folder in os.listdir(train_dir):
  print(class_folder)
  if 'Class' in class_folder: # find .ipynb_checkpoints in Google Colab. Thus, I had to set this condition
    df_temp = pd.DataFrame({str_filename: [os.path.join(train_dir, class_folder, filename) for filename in os.listdir(os.path.join(train_dir, class_folder))],
                            str_target: np.repeat(class_folder.strip(),
                                                  len(os.listdir(os.path.join(train_dir, class_folder))))})
    df_train_dir = pd.concat([df_train_dir, df_temp])

df_train_dir = shuffle(df_train_dir, random_state=random_state)
df_train_dir.reset_index(inplace=True, drop=True)

print(df_train_dir)

# Create generator with augmentation for training
train_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.head(int(df_train_dir.shape[0] * 0.8)),
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

val_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.iloc[int(0.8 * df_train_dir.shape[0]) : int(0.9 * df_train_dir.shape[0])],
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

test_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.tail(int(df_train_dir.shape[0] * 0.1)),
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

classes = df_train_dir.head(int(df_train_dir.shape[0] * 0.8))[str_target].str.replace('Class_', '').astype(np.int16)
print(classes)
neg, pos = np.bincount(classes)
        
total = neg + pos
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

classes = dict((v, k) for k, v in train_gen.class_indices.items())
num_classes = len(classes)

Class_0
Class_1
                                                filename   target
0      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
1      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
2      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
3      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
4      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
...                                                  ...      ...
25848  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
25849  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
25850  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
25851  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
25852  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1

[25853 rows x 2 columns]
Found 20682 validated image filenames belonging to 2 classes.
Found 2585 validated image filenames belonging to 2 classes.
Found 2585 validated image filenames belong

In [101]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                tfa.metrics.F1Score(name='f1_score', num_classes=1, threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]

img_width, img_height = 150, 150

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)


keras_ResNet_model = tf.keras.Sequential([hub.KerasLayer(ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)))])
model_B_on_A = keras.models.Sequential(keras_ResNet_model.layers[:-1])

model_B_on_A.add(keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Conv2D(32, (3, 3)))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Conv2D(64, (3, 3)))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Flatten())  # this converts our 3D feature maps to 1D feature vectors
model_B_on_A.add(keras.layers.Dense(64))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.Dropout(0.5))
model_B_on_A.add(keras.layers.Dense(1))
model_B_on_A.add(keras.layers.Activation('sigmoid'))

for layer in model_B_on_A.layers[:-1]:
  layer.trainable = False

optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model_B_on_A.compile(loss='binary_crossentropy',
                     optimizer=optimizer,
                     metrics=list_metrics)

history = model_B_on_A.fit_generator(train_gen,
                           epochs=5,
                           validation_data=val_gen,
                           callbacks = callback,
                           class_weight=class_weight,
                           verbose = 1)

  history = model_B_on_A.fit_generator(train_gen,


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [105]:
dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))

print('\n\n\n Without retrain all layers ')
print('Test Performance\n\n', dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
print('Hey, retraining :)')

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
model_B_on_A.compile(loss='binary_crossentropy',
                     optimizer=optimizer,
                     metrics=list_metrics)

history = model_B_on_A.fit_generator(train_gen,
                           epochs=20,
                           validation_data=val_gen,
                           callbacks = callback,
                           class_weight=class_weight,
                           verbose = 1)

# predicted_proba = model_B_on_A.predict(test_ds)
# list_actual_class, list_predict_class = get_clip_level_prediction(clip_level_list_actual_class, predicted_proba)
# precision = precision_score(y_true=list_actual_class, y_pred=list_predict_class)
# recall = recall_score(y_true=list_actual_class, y_pred=list_predict_class)
# specificity = recall_score(y_true=list_actual_class, y_pred=list_predict_class, pos_label=0)
# f1 = f1_score(y_true=list_actual_class, y_pred=list_predict_class, average='macro')
# acc = accuracy_score(y_true=list_actual_class, y_pred=list_predict_class)

# all_predicted_proba.extend(list_predict_class)
# all_list_actual_class.extend(list_actual_class)

dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)

print('\n\n\n After retrain all layers ')
print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')




 Without retrain all layers 
Test Performance

 0.45781734585762024 1.0 0.0007132667617689016 0.45802706480026245 0.5587420463562012 0.5003566333808844 



Hey, retraining :)


  history = model_B_on_A.fit_generator(train_gen,


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20



 After retrain all layers 
Test Performance

 0.761425256729126 0.8309382796287537 0.0007132667617689016 0.8034816384315491 0.8835452198982239 0.41582577319526126 





In [107]:
model_B_on_A.save('resnet_tuned_on_google_aud.keras')

In [106]:
dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)

print('\n\n\n After retrain all layers ')
print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')




 After retrain all layers 
Test Performance

 0.761425256729126 0.8309382796287537 0.0007132667617689016 0.8034816384315491 0.8835452198982239 0.41582577319526126 





In [18]:
import tensorflow as tf

print(tf.test.is_gpu_available())
assert tf.test.is_built_with_cuda()

False


AssertionError: 

# ResNet50 on DataVerse

In [None]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                tfa.metrics.F1Score(name='f1_score', num_classes=1, threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]

df_image_path = pd.DataFrame(columns = [str_filename, str_target])
loc_dataverse_image = "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Dataverse/"
str_cls_1 = 'Class_1'
str_cls_0 = 'Class_0'


for fore_or_back_folder in os.listdir(loc_dataverse_image):
  df_temp = pd.DataFrame({str_filename: [os.path.join(loc_dataverse_image, fore_or_back_folder, filename) 
                                         for filename in os.listdir(os.path.join(loc_dataverse_image, fore_or_back_folder))],
                          str_target: np.repeat(str_cls_1 if str_fore in fore_or_back_folder else str_cls_0,
                                                len(os.listdir(os.path.join(loc_dataverse_image, fore_or_back_folder))))})
  df_image_path = pd.concat([df_image_path, df_temp])

df_image_path = shuffle(df_image_path, random_state=random_state)
df_image_path.reset_index(inplace=True, drop=True)

print(df_image_path)

                                                filename   target
0      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
1      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
2      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
3      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
4      C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_1
...                                                  ...      ...
79473  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
79474  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
79475  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
79476  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0
79477  C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure...  Class_0

[79478 rows x 2 columns]


In [17]:
def tune_resnet_model_for_dataverse():
  global  all_list_actual_class, all_predicted_proba
  
  all_list_actual_class = [] 
  all_predicted_proba = []

  image_size = 150
  input_size = 150
  img_width, img_height = 150, 150

  validation_ratio = 0.10
  test_ratio = 0.10

  n_epoch_for_bias = 2
  n_epoch_retraining = 30
  batch_size = 24

  skf = StratifiedKFold(n_splits=25, shuffle=True, random_state = random_state)

  for ith_fold, (train_index, test_index) in enumerate(skf.split(df_image_path[str_filename], df_image_path[str_target])):
      
      ################################# Handling Training Data ################################
      x_train = df_image_path.iloc[train_index][str_filename].tolist()
      y_train = df_image_path.iloc[train_index][str_target].tolist()

      df_temp_train = pd.DataFrame({str_filename: x_train, str_target: y_train})
      df_temp_train[str_fold] = np.repeat(1, df_temp_train.shape[0]) # 1 is used (randomly) to denote the train fold everywhere
      df_temp_train = shuffle(df_temp_train, random_state=random_state)
      df_temp_train.reset_index(inplace=True, drop=True)
      
      train_gen = ImageDataGenerator(samplewise_center = True,
                                    samplewise_std_normalization = True).flow_from_dataframe(
                                    dataframe = df_temp_train,
                                    x_col = str_filename,
                                    y_col = str_target,
                                    directory = loc_dataverse_image,
                                    target_size = (image_size, image_size),
                                    class_mode = 'binary',
                                    batch_size = batch_size,
                                    shuffle = True)
      classes = df_temp_train[str_target].str.replace('Class_', '').astype(np.int16)
      neg, pos = np.bincount(classes)
      total = neg + pos
      weight_for_0 = (1 / neg) * (total / 2.0)
      weight_for_1 = (1 / pos) * (total / 2.0)
      class_weight = {0: weight_for_0, 1: weight_for_1}

      ################################ Validation and Test data ################################
      x_test = df_image_path.iloc[test_index][str_filename].tolist()
      y_test = df_image_path.iloc[test_index][str_target].tolist()

      x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size = test_ratio / (test_ratio + validation_ratio),
                                                      shuffle=True, stratify = y_test, random_state = random_state)
      df_temp_val = pd.DataFrame({str_filename: x_val, str_target: y_val})
      df_temp_val[str_fold] = np.repeat(2, df_temp_val.shape[0])

      df_temp_test = pd.DataFrame({str_filename: x_test, str_target: y_test})
      df_temp_test[str_fold] = np.repeat(3, df_temp_test.shape[0])

      pd_data = pd.concat([df_temp_train, df_temp_val, df_temp_test])
      
      ################################ Image Data Generator ################################
      for train_fold, val_fold, test_fold in ((1, 2, 3), (1, 3, 2)):
        val_gen = ImageDataGenerator(samplewise_center=True,
                                      samplewise_std_normalization=True).flow_from_dataframe(
                                      dataframe=pd_data[pd_data[str_fold] == val_fold],
                                      x_col = str_filename,
                                      y_col = str_target,
                                      directory = loc_dataverse_image,
                                      target_size = (image_size, image_size),
                                      class_mode ='binary',
                                      batch_size = batch_size,
                                      shuffle=True)

        test_gen = ImageDataGenerator(samplewise_center=True,
                                      samplewise_std_normalization=True).flow_from_dataframe(
                                      dataframe=pd_data[pd_data[str_fold] == test_fold],
                                      x_col = str_filename,
                                      y_col = str_target,
                                      directory=loc_dataverse_image,
                                      target_size=(image_size, image_size),
                                      class_mode='binary',
                                      batch_size= batch_size,
                                      shuffle=True)

        ###################### Model training (For bias in the last layer) ######################
        if K.image_data_format() == 'channels_first':
            input_shape = (3, img_width, img_height)
        else:
            input_shape = (img_width, img_height, 3)


        keras_ResNet_model = tf.keras.Sequential([hub.KerasLayer(ResNet50(include_top=False, weights='imagenet'))])
        model_B_on_A = keras.models.Sequential(keras_ResNet_model.layers[:-1])

        model_B_on_A.add(keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
        model_B_on_A.add(keras.layers.Activation('relu'))
        model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model_B_on_A.add(keras.layers.Conv2D(32, (3, 3)))
        model_B_on_A.add(keras.layers.Activation('relu'))
        model_B_on_A.add(keras.layers.Dropout(0.1))
        model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model_B_on_A.add(keras.layers.Conv2D(64, (3, 3)))
        model_B_on_A.add(keras.layers.Activation('relu'))
        model_B_on_A.add(keras.layers.Dropout(0.2))
        model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

        model_B_on_A.add(keras.layers.Flatten())  # this converts our 3D feature maps to 1D feature vectors
        model_B_on_A.add(keras.layers.Dense(64))
        model_B_on_A.add(keras.layers.Activation('relu'))
        model_B_on_A.add(keras.layers.Dropout(0.5))
        model_B_on_A.add(keras.layers.Dense(1))
        model_B_on_A.add(keras.layers.Activation('sigmoid'))

        for layer in model_B_on_A.layers[:-1]:
          layer.trainable = False

        optimizer = keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        model_B_on_A.compile(loss='binary_crossentropy',
                            optimizer=optimizer,
                            metrics=list_metrics)

        history = model_B_on_A.fit(train_gen,
                                  epochs = n_epoch_for_bias,
                                  validation_data = val_gen,
                                  callbacks = callback,
                                  class_weight = class_weight,
                                  verbose = 1)
        
        dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
        specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))
        print('\n\n\n Training only the last layers 🙈😡😠')
        print('Test Performance\n\n', dict_predict_test.get('precision'), dict_predict_test.get('recall'), specificity,
                dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')

        ###################### Re-training all layers ######################
        print('Hey, retraining 😍😍😍🤜🤛')
        for layer in model_B_on_A.layers[:-1]:
            layer.trainable = True

        optimizer = keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
        model_B_on_A.compile(loss='binary_crossentropy',
                            optimizer=optimizer,
                            metrics=list_metrics)

        history = model_B_on_A.fit(train_gen,
                                  epochs = n_epoch_retraining,
                                  validation_data=val_gen,
                                  callbacks = callback,
                                  class_weight=class_weight,
                                  verbose = 1)

        all_predicted_proba.extend([arr[0] for arr in list(model_B_on_A.predict(test_gen))])
        all_list_actual_class.extend(pd_data[pd_data[str_fold] == test_fold][str_target].tolist())

        dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
        specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))
        
        print('\n\n\n After retrain all layers ')
        print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
                specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
        

tune_resnet_model_for_dataverse()

NameError: name 'df_image_path' is not defined

In [24]:
print(all_predicted_proba)

[0.53886336, 0.5624884, 0.56633466, 0.53066903, 0.555843, 0.5406961, 0.539951, 0.5603572, 0.56742716, 0.5406106, 0.5407574, 0.5364005, 0.53874063, 0.53457224, 0.55638576, 0.53411424, 0.53517586, 0.54127455, 0.5316514, 0.55615896, 0.53513885, 0.53115, 0.5339209, 0.55917865, 0.54997903, 0.53297436, 0.5453768, 0.55439657, 0.5410168, 0.5417628, 0.5470463, 0.5698116, 0.5478723, 0.5521342, 0.52772635, 0.5558705, 0.53750205, 0.5506293, 0.55495685, 0.55107164, 0.5481269, 0.5413895, 0.55508286, 0.53797585, 0.53820914, 0.558306, 0.54972976, 0.53830385, 0.5306554, 0.53568125, 0.5397202, 0.5500265, 0.56773597, 0.5406096, 0.5373824, 0.55526584, 0.53633446, 0.53961957, 0.5311337, 0.5455668, 0.52654487, 0.5399522, 0.55483806, 0.5418452, 0.54355484, 0.5451724, 0.52488124, 0.53597194, 0.55083865, 0.565778, 0.54932284, 0.5442295, 0.52515507, 0.54983324, 0.5545155, 0.53137803, 0.54434204, 0.5367619, 0.53328204, 0.5502024, 0.5293398, 0.5418244, 0.5605128, 0.53879195, 0.54603857, 0.56032276, 0.53200316, 0.

In [22]:
result_arrays = [np.array([0.42610288], dtype=np.float32), np.array([0.41446418])]

# Extract values from each array
values = [arr[0] for arr in result_arrays]

print(values)


[0.42610288, 0.41446418]


# Operation Theater

In [28]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
str_fore = 'foreground'

plt.ioff()

loc_raw_dataverse_files = 'C:/Users/wyd2hu/OneDrive - University of Virginia/Katha/dataverse_files/'
loc_fig_dataverse = 'C:/Users/wyd2hu/S2He/AudData/ResNetPlay//Only_Spectrogram_Figure_Dataverse/' 

def create_spectrogram(file_name, loc_fig):
    n_row = 1
    n_col = 1
    fig, axs = plt.subplots(n_col, n_row, figsize=(20, 20))

    signal, sr = librosa.load(file_name)
    N_FFT = int(sr * 0.025)
    HOP_LENGTH = int(sr * 0.0125)

    stft = librosa.stft(signal, n_fft = N_FFT, hop_length = HOP_LENGTH)
    
    librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), 
                            y_axis='log', x_axis='time', ax=axs)
    
    axs.xaxis.label.set_visible(False)
    axs.yaxis.label.set_visible(False)
    axs.set_xticklabels([])
    axs.set_yticklabels([])
    
    fig.savefig(loc_fig)
    plt.close(fig)
    

import warnings
warnings.filterwarnings('ignore')

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  n_file_processing = 0

  for class_folder in sorted(os.listdir(loc_data_folder)):
    if 'DS_Store' not in class_folder:
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))

      for fore_back in os.listdir(os.path.join(loc_data_folder, class_folder)):
        for file_name in sorted(os.listdir(os.path.join(loc_data_folder, class_folder, fore_back))):
          if file_name.endswith('.wav') and (not os.path.exists(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))) and (librosa.get_duration(filename=os.path.join(loc_data_folder, class_folder, fore_back, file_name)) > 0) :
              create_spectrogram(os.path.join(loc_data_folder, class_folder, fore_back, file_name),
                                      os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))

          n_file_processing += 1
          print(n_file_processing, class_folder, os.path.basename(loc_data_folder))

create_spectram_plot(loc_raw_dataverse_files,
                     loc_fig_dataverse)

1 fold1_foreground 
2 fold1_foreground 
3 fold1_foreground 
4 fold1_foreground 
5 fold1_foreground 
6 fold1_foreground 
7 fold1_foreground 
8 fold1_foreground 
9 fold1_foreground 
10 fold1_foreground 
11 fold1_foreground 
12 fold1_foreground 
13 fold1_foreground 
14 fold1_foreground 
15 fold1_foreground 
16 fold1_foreground 
17 fold1_foreground 
18 fold1_foreground 
19 fold1_foreground 
20 fold1_foreground 
21 fold1_foreground 
22 fold1_foreground 
23 fold1_foreground 
24 fold1_foreground 
25 fold1_foreground 
26 fold1_foreground 
27 fold1_foreground 
28 fold1_foreground 
29 fold1_foreground 
30 fold1_foreground 
31 fold1_foreground 
32 fold1_foreground 
33 fold1_foreground 
34 fold1_foreground 
35 fold1_foreground 
36 fold1_foreground 
37 fold1_foreground 
38 fold1_foreground 
39 fold1_foreground 
40 fold1_foreground 
41 fold1_foreground 
42 fold1_foreground 
43 fold1_foreground 
44 fold1_foreground 
45 fold1_foreground 
46 fold1_foreground 
47 fold1_foreground 
48 fold1_foreground 
4

In [1]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
str_fore = 'foreground'

plt.ioff()

loc_raw_dataverse_files = 'C:/Users/wyd2hu/OneDrive - University of Virginia/Katha/dataverse_files/'
loc_fig_dataverse = 'C:/Users/wyd2hu/S2He/AudData/ResNetPlay//Only_Spectrogram_Figure_Dataverse/' 

def create_spectrogram(file_name, loc_fig):
    n_row = 1
    n_col = 1
    fig, axs = plt.subplots(n_col, n_row, figsize=(20, 20))

    signal, sr = librosa.load(file_name)
    N_FFT = int(sr * 0.025)
    HOP_LENGTH = int(sr * 0.0125)

    stft = librosa.stft(signal, n_fft = N_FFT, hop_length = HOP_LENGTH)
    
    librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), 
                            y_axis='log', x_axis='time', ax=axs)
    
    axs.xaxis.label.set_visible(False)
    axs.yaxis.label.set_visible(False)
    axs.set_xticklabels([])
    axs.set_yticklabels([])
    
    fig.savefig(loc_fig)
    plt.close(fig)
    

import warnings
warnings.filterwarnings('ignore')

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  n_file_processing = 0

  for class_folder in sorted(os.listdir(loc_data_folder)):
    if 'DS_Store' not in class_folder:
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))

      for fore_back in os.listdir(os.path.join(loc_data_folder, class_folder)):
        for file_name in sorted(os.listdir(os.path.join(loc_data_folder, class_folder, fore_back))) [19000: ]:
           if file_name.endswith('.wav'):   
            try:
                loc_fig = os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png'))
                # print(loc_fig)
                img = Image.open(loc_fig)
                img.verify()
            except Exception as e:
                print('Bad file:', loc_fig)
                os.remove(loc_fig)
                create_spectrogram(os.path.join(loc_data_folder, class_folder, fore_back, file_name), loc_fig)

        # n_file_processing += 1
        # print(n_file_processing, class_folder, os.path.basename(loc_data_folder))


create_spectram_plot(loc_raw_dataverse_files,
                     loc_fig_dataverse)

Bad file: C:/Users/wyd2hu/S2He/AudData/ResNetPlay//Only_Spectrogram_Figure_Dataverse/fold2_background\0_3657.png


PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:/Users/wyd2hu/S2He/AudData/ResNetPlay//Only_Spectrogram_Figure_Dataverse/fold2_background\\0_3657.png'