## Import TensorFlow and other libraries


In [2]:
import os

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio
from tensorflow.keras.applications import ResNet50

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from collections import Counter

from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator

# from keras.src.legacy.preprocessing.image import ImageDataGenerator
import keras

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, StratifiedKFold

In [3]:
from importlib.metadata import version 
version('tensorflow')

'2.15.0'

In [4]:
WORKING_MAC = True
if WORKING_MAC:
    loc_file_path = 'mac_file_path.csv'
else:
    loc_file_path = 'file_path.csv'

random_state = 1234

str_back = 'background'
str_embeddings = 'Embeddings'
str_fore = 'foreground'
str_fold = 'fold'
str_filename = 'filename'
str_target = 'target'

#############  Tensorflow Random State  #########################
tf.keras.utils.set_random_seed(random_state)

In [None]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [None]:
import os
import zipfile
import re

def extract_all_zips():
  if WORKING_MAC:
    loc_file_folder = '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/dataverse_files'
  else:
     loc_file_folder = 'C:/Users/wyd2hu/OneDrive - University of Virginia/Katha/dataverse_files/'

  data_list_df = []

  for outer_folder in os.listdir(loc_file_folder):
    if 'DS_Store' not in outer_folder:
      for wav_file in os.listdir(os.path.join(loc_file_folder, outer_folder)):
          if wav_file.endswith('.wav'):
            data_list_df.append([os.path.join(loc_file_folder, outer_folder, wav_file),
                                1 if str_fore in outer_folder else 0,
                                int(re.findall(r'\d+', outer_folder)[0])])
  
  df_path = pd.DataFrame(data = data_list_df, columns = [str_filename, str_target, 'Dataset Fold'])

  
  df_path.to_csv(loc_file_path, index=False)
  print(df_path)
  print(df_path.shape)

extract_all_zips()

# Transfer the learning from YAMNet

In [None]:
# Utility functions for loading audio files and making sure the sample rate is correct.
# applies the embedding extraction model to a wav data
def extract_embedding(wav_data, label, fold):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(fold, num_embeddings))

def load_wav_for_map(filename, label, fold):
  return load_wav_16k_mono(filename), label, fold

@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav


def extract_embedding_for_all_data(wav_data, label, filename):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(filename, num_embeddings))

def load_wav_for_map_for_all_data(filename, label):
  return load_wav_16k_mono(filename), label, filename

In [None]:
def get_clip_level_prediction(list_actual_class, predicted_proba):
  zip_true_predicted = list(zip(list_actual_class, predicted_proba))

  list_y_true = []
  list_y_pred = []

  for first_frame, second_frame in zip(zip_true_predicted[::2], zip_true_predicted[1::2]):

    if first_frame[0] != second_frame[0]:
      print('\n\n\n\nSevere problem\n\n\n\n')
    else:
      list_y_true.append(first_frame[0])

      clip_prob = np.array([first_frame[1], second_frame[1]])
      class_in_frames = clip_prob.argmax(axis=-1)
      if len(np.unique(class_in_frames)) == 1: # means the predicted class is same in each frame:
        list_y_pred.append(class_in_frames[0])
      else: # means the predicted class differs between the first and second frame of 1 second audio clip
        list_y_pred.append(np.argmax(np.sum(clip_prob, axis=0)))

  return list_y_true, list_y_pred

In [None]:
def get_df_g_aud_loc():
  loc_g_aud = 'C:/Users/wyd2hu/S2He/AudData/Google AudioSet_Balanced_Train/Speech//10_seconds'
  list_loc_g_aud_files = loc_g_aud +'//'+ pd.Series(os.listdir(loc_g_aud))
  df_g_aud_loc = pd.DataFrame({str_filename: list_loc_g_aud_files,
                               str_target: np.repeat(1, len(list_loc_g_aud_files))})
  df_g_aud_loc.to_excel('google_aud_speech_class_data.xlsx', index=False)
  print(df_g_aud_loc)

get_df_g_aud_loc()

In [None]:
df_path = pd.read_csv(loc_file_path)

df_path = shuffle(df_path, random_state = random_state)
df_path.reset_index(inplace=True, drop=True)

list_all_files = df_path[str_filename].tolist()
list_all_labels = df_path[str_target].tolist()

df_g_aud_loc = pd.read_excel('google_aud_speech_class_data.xlsx')
list_all_files.extend(df_g_aud_loc[str_filename])
list_all_labels.extend(df_g_aud_loc[str_target])

main_ds_all_data = tf.data.Dataset.from_tensor_slices((list_all_files, list_all_labels))
main_ds_all_data = main_ds_all_data.map(load_wav_for_map_for_all_data)
main_ds_all_data = main_ds_all_data.map(extract_embedding_for_all_data).unbatch()

df_embedding_all_data = pd.DataFrame(columns = [str_embeddings, str_target, str_filename])
for emb, label, file_name in main_ds_all_data:
  df_embedding_all_data.loc[-1] = [emb, label, file_name.numpy().decode()]
  df_embedding_all_data.index += 1
  df_embedding_all_data = df_embedding_all_data.sort_index()

print(df_embedding_all_data)


## Model Training, Validation, and Test

In [None]:
df_embedding_all_data = pd.read_pickle('embedd_dataverse_g_aud_balanced_trained.pkl')

In [None]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                keras.metrics.F1Score(name='f1_score', threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]
print('I am here')

def train_val_test():
  global all_predicted_proba, all_list_actual_class, df_embedding_all_data, METRICS
  all_predicted_proba = []
  all_list_actual_class = []

  batch_size = 32
  n_epoch = 10
  
  print('I am here')

  df_path = pd.read_csv(loc_file_path)
  df_path = shuffle(df_path, random_state = random_state)
  df_path.reset_index(inplace=True, drop=True)
  print('I am here')
  
  my_classes = set(df_path[str_target])
  validation_ratio = 0.10
  test_ratio = 0.10

  for batch_size in [32]:
    print('I am here')

    skf = StratifiedKFold(n_splits=25, shuffle=True, random_state = random_state)

    for ith_fold, (train_index, test_index) in enumerate(skf.split(df_path[str_filename], df_path[str_target])):

      # Keeping the filenames in x_train since just based on the filenames, the embedddings will be retrieved.
      x_train = df_path.iloc[train_index][str_filename].tolist()
      y_train = df_path.iloc[train_index][str_target].tolist()

      x_test = df_path.iloc[test_index][str_filename].tolist()
      y_test = df_path.iloc[test_index][str_target].tolist()

      x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size = test_ratio / (test_ratio + validation_ratio),
                                                      shuffle=True, stratify = y_test, random_state = random_state)

      df_temp_train = pd.DataFrame({str_filename: x_train, str_target: y_train})
    #   df_original_class_0 = df_temp_train[df_temp_train[str_target] == 0].copy()
    #   df_train_class_1 = df_temp_train[df_temp_train[str_target] == 1].copy()

    #   df_g_aud_loc = pd.read_excel('google_aud_speech_class_data.xlsx')
    #   df_train_class_0 = df_original_class_0.iloc[:(df_train_class_1.shape[0] + int((int(10/0.48) * df_g_aud_loc.shape[0]) / 2))].copy()
    #   df_temp_train = pd.concat([df_train_class_1, df_train_class_0])

    #   df_temp_train = pd.concat([df_temp_train, df_g_aud_loc])
      df_temp_train[str_fold] = np.repeat(1, df_temp_train.shape[0]) # 1 is used (randomly) to denote the train fold everywhere

      df_temp_val = pd.DataFrame({str_filename: x_val, str_target: y_val})
      df_temp_val[str_fold] = np.repeat(2, df_temp_val.shape[0])

      df_temp_test = pd.DataFrame({str_filename: x_test, str_target: y_test})
      df_temp_test[str_fold] = np.repeat(3, df_temp_test.shape[0])

      pd_data = pd.concat([df_temp_train, df_temp_val, df_temp_test])

      for train_fold, val_fold, test_fold in ((1, 2, 3), (1, 3, 2)):
        print(train_fold, val_fold, test_fold)
        filenames = pd_data[str_filename]
        targets = pd_data[str_target]
        folds = pd_data[str_fold]

        # print(df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == train_fold][str_filename].tolist()))
        train_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == train_fold][str_filename])].copy()
        # rows_organized_train = []
        # for row_class_1, row_class_0 in zip(train_ds[train_ds[str_target] == 1].itertuples(index = False),
        #                                     train_ds[train_ds[str_target] == 0].itertuples(index = False)):
        #     rows_organized_train.append(row_class_1)
        #     rows_organized_train.append(row_class_0)
        
        train_ds = shuffle(train_ds, random_state = random_state)
        train_ds.reset_index(inplace=True, drop=True)

        neg, pos = np.bincount(train_ds[str_target])
        
        total = neg + pos
        weight_for_0 = (1 / neg) * (total / 2.0)
        weight_for_1 = (1 / pos) * (total / 2.0)

        class_weight = {0: weight_for_0, 1: weight_for_1}


        # train_ds = pd.DataFrame(data = rows_organized_train,
        #                         columns=train_ds.columns.tolist())
        print('\n\nTrain', train_ds[train_ds[str_target] == 1].shape[0], train_ds[train_ds[str_target] == 0].shape[0])

        train_ds = tf.data.Dataset.from_tensor_slices((train_ds[str_embeddings].tolist(), train_ds[str_target].tolist()))

        val_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == val_fold][str_filename])].copy()
        val_ds = tf.data.Dataset.from_tensor_slices((val_ds[str_embeddings].tolist(), val_ds[str_target].tolist()))

        test_ds = df_embedding_all_data[df_embedding_all_data[str_filename].isin(pd_data[pd_data[str_fold] == test_fold][str_filename])].copy()
        test_ds = tf.data.Dataset.from_tensor_slices((test_ds[str_embeddings].tolist(), test_ds[str_target].tolist()))

        clip_level_list_actual_class =  list(map(lambda x: x[1].numpy(), test_ds))
        print(len(clip_level_list_actual_class))
        # print(clip_level_list_actual_class)

        print('Hey, I am here 1')

        train_ds = train_ds.cache().shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        val_ds = val_ds.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)
        test_ds = test_ds.cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

        print('Hey, I am here 2')

        keras_yamnet_model = keras.Sequential([hub.KerasLayer(yamnet_model)])
        leaky_relu = keras.layers.LeakyReLU(alpha=0.1)

        print(np.log([pos/neg]))

        model_B_on_A = keras.models.Sequential(keras_yamnet_model.layers[:-4])
        model_B_on_A.add(keras.layers.Dense(2048, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(1024, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(512, activation = leaky_relu))
        model_B_on_A.add(keras.layers.Dense(len(my_classes)-1, activation ='sigmoid', bias_initializer=  tf.keras.initializers.Constant(np.log([pos/neg]))))

        for layer in model_B_on_A.layers[:-4]:
          layer.trainable = False

        print('Hey, I am here 3')

        optimizer = keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
        model_B_on_A.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=optimizer,
                            metrics= list_metrics)
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
        history = model_B_on_A.fit(train_ds, epochs=n_epoch,
                                  validation_data=val_ds,
                                  callbacks = callback,
                                  class_weight=class_weight,
                                  verbose=1)
        
        dict_predict_test = model_B_on_A.evaluate(test_ds, return_dict=True)
        specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))

        print('\n\n\n Without retrain all layers ')
        print('Test Performance\n\n', batch_size, ith_fold, dict_predict_test.get('precision'), dict_predict_test.get('recall'),
               specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
        print('Hey, retraining :)')

        for layer in model_B_on_A.layers[:-4]:
          layer.trainable = True

        optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
        model_B_on_A.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=optimizer,
                             metrics=list_metrics)
        history = model_B_on_A.fit(train_ds, epochs=n_epoch,
                                   validation_data=val_ds,
                                   class_weight=class_weight,
                                   callbacks = callback, verbose=1)

        # predicted_proba = model_B_on_A.predict(test_ds)
        # list_actual_class, list_predict_class = get_clip_level_prediction(clip_level_list_actual_class, predicted_proba)
        # precision = precision_score(y_true=list_actual_class, y_pred=list_predict_class)
        # recall = recall_score(y_true=list_actual_class, y_pred=list_predict_class)
        # specificity = recall_score(y_true=list_actual_class, y_pred=list_predict_class, pos_label=0)
        # f1 = f1_score(y_true=list_actual_class, y_pred=list_predict_class, average='macro')
        # acc = accuracy_score(y_true=list_actual_class, y_pred=list_predict_class)

        # all_predicted_proba.extend(list_predict_class)
        # all_list_actual_class.extend(list_actual_class)

        dict_predict_test = model_B_on_A.evaluate(test_ds, return_dict=True)

        print('\n\n\n After retrain all layers ')
        print('Test Performance\n\n', batch_size, ith_fold, dict_predict_test.get('precision'), dict_predict_test.get('recall'),
               specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')

train_val_test()

In [None]:
# Standalone usage:
initializer = RandomNormal(mean=0.0, stddev=1.0)
values = initializer(shape=(2, 2))
print(values)

In [None]:
print(tf.data.Dataset.from_tensor_slices([5]))

In [None]:
list_predict_class = all_predicted_proba
list_actual_class = all_list_actual_class
precision = precision_score(y_true=list_actual_class, y_pred=list_predict_class)
recall = recall_score(y_true=list_actual_class, y_pred=list_predict_class)
specificity = recall_score(y_true=list_actual_class, y_pred=list_predict_class, pos_label=0)
f1 = f1_score(y_true=list_actual_class, y_pred=list_predict_class, average='macro')
acc = accuracy_score(y_true=list_actual_class, y_pred=list_predict_class)

print(precision, recall, specificity, f1, acc, (recall + specificity)/2)

# Downloading Google Audio Dataset

In [None]:
import pandas as pd

df_class_label = pd.read_csv("C:/Users/wyd2hu/Downloads/archive/class_labels_indices.csv")
df_file_name = pd.read_csv("C:/Users/wyd2hu/Downloads/archive/train.csv")
loc_root_wav = 'C:/Users/wyd2hu/Downloads/archive/train_wav/'

list_file_mid = []
list_class_names = []

for index, row_class in df_class_label.iterrows():
    if 'speech' in row_class['display_name'].lower() and 'noise' not in row_class['display_name'].lower():
        list_file_mid.append(row_class['mid'])
        list_class_names.append(row_class['display_name'])

def has_mid_name(value_list):
  return any(value in value_list for value in list_file_mid)

print(list_class_names)

In [None]:
from audioset_download import Downloader






# list_class_names.append('Narration, monologue')
# print(list_class_names)
list_class_names = ['Speech']






d = Downloader(root_path='C:/Users/wyd2hu/S2He/AudData/Google AudioSet_Balanced_Train/', labels=list_class_names, n_jobs=13, download_type='balanced_train', copy_and_replicate=False)
d.download(format = 'wav')

In [None]:
df_file_name['positive_labels'] = df_file_name['positive_labels'].apply(lambda x: x.split(','))
filtered_df = df_file_name[df_file_name['positive_labels'].apply(has_mid_name)]

for ytid in filtered_df['YTID']:
    loc_wav = loc_root_wav + ytid + '.wav'
    audio_data, sample_rate = librosa.load(loc_wav)
    duration = librosa.get_duration(y=audio_data, sr=sample_rate)
    print(duration)
    try:
        extract_features(loc_wav, True)
    except Exception as e:
        print(e)

# ResNet50 Tuning Short Google Audio Dataset

In [None]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
plt.ioff()

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  for class_folder in os.listdir(loc_data_folder):
    if 'Class_0' in class_folder:
      n_file_processing = 0
      n_class_instance = 0
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))
      for file_name in os.listdir(os.path.join(loc_data_folder, class_folder)):
        if file_name.endswith('.wav') and (not os.path.exists(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))) and (librosa.get_duration(filename=os.path.join(loc_data_folder, class_folder, file_name)) > 0) :
          n_class_instance += 1
          signal, sr = librosa.load(os.path.join(loc_data_folder, class_folder, file_name))
          stft = librosa.stft(signal)

          fig, ax = plt.subplots()
          img = librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), ax=ax)
          fig.colorbar(img, ax=ax, format="%+2.0f dB")
          fig.savefig(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))
          plt.close(fig)
        
        n_file_processing += 1
        print(n_file_processing, n_class_instance, class_folder, os.path.basename(loc_data_folder))

# create_spectram_plot("C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Train_Speech",
#                      "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud")
# create_spectram_plot("C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Test_Speech",
#                      "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud")

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam

keras_ResNet_model = tf.keras.Sequential([hub.KerasLayer(ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)))])

In [None]:
from keras.preprocessing.image import ImageDataGenerator
image_size = 150
input_size = 150
train_dir = "C:/Users/wyd2hu/S2He/AudData/ResNetPlay/Figure_Google_Aud/"
from keras.preprocessing.image import ImageDataGenerator
import re

df_train_dir = pd.DataFrame(columns = [str_filename, str_target])
for class_folder in os.listdir(train_dir):
  print(class_folder)
  if 'Class' in class_folder: # find .ipynb_checkpoints in Google Colab. Thus, I had to set this condition
    df_temp = pd.DataFrame({str_filename: [os.path.join(train_dir, class_folder, filename) for filename in os.listdir(os.path.join(train_dir, class_folder))],
                            str_target: np.repeat(class_folder.strip(),
                                                  len(os.listdir(os.path.join(train_dir, class_folder))))})
    df_train_dir = pd.concat([df_train_dir, df_temp])

df_train_dir = shuffle(df_train_dir, random_state=random_state)
df_train_dir.reset_index(inplace=True, drop=True)

print(df_train_dir)

# Create generator with augmentation for training
train_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.head(int(df_train_dir.shape[0] * 0.8)),
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

val_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.iloc[int(0.8 * df_train_dir.shape[0]) : int(0.9 * df_train_dir.shape[0])],
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

test_gen = ImageDataGenerator(samplewise_center=True,
                               samplewise_std_normalization=True).flow_from_dataframe(
                               dataframe=df_train_dir.tail(int(df_train_dir.shape[0] * 0.1)),
                               x_col = str_filename,
                               y_col = str_target,
                               directory=train_dir,
                               target_size=(image_size, image_size),
                               class_mode='binary',
                               batch_size=16,
                               shuffle=True)

classes = df_train_dir.head(int(df_train_dir.shape[0] * 0.8))[str_target].str.replace('Class_', '').astype(np.int16)
print(classes)
neg, pos = np.bincount(classes)
        
total = neg + pos
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

classes = dict((v, k) for k, v in train_gen.class_indices.items())
num_classes = len(classes)

In [None]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                tfa.metrics.F1Score(name='f1_score', num_classes=1, threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]

img_width, img_height = 150, 150

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)


keras_ResNet_model = tf.keras.Sequential([hub.KerasLayer(ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3)))])
model_B_on_A = keras.models.Sequential(keras_ResNet_model.layers[:-1])

model_B_on_A.add(keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Conv2D(32, (3, 3)))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Conv2D(64, (3, 3)))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))

model_B_on_A.add(keras.layers.Flatten())  # this converts our 3D feature maps to 1D feature vectors
model_B_on_A.add(keras.layers.Dense(64))
model_B_on_A.add(keras.layers.Activation('relu'))
model_B_on_A.add(keras.layers.Dropout(0.5))
model_B_on_A.add(keras.layers.Dense(1))
model_B_on_A.add(keras.layers.Activation('sigmoid'))

for layer in model_B_on_A.layers[:-1]:
  layer.trainable = False

optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model_B_on_A.compile(loss='binary_crossentropy',
                     optimizer=optimizer,
                     metrics=list_metrics)

history = model_B_on_A.fit_generator(train_gen,
                           epochs=5,
                           validation_data=val_gen,
                           callbacks = callback,
                           class_weight=class_weight,
                           verbose = 1)

In [None]:
dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))

print('\n\n\n Without retrain all layers ')
print('Test Performance\n\n', dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
print('Hey, retraining :)')

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
model_B_on_A.compile(loss='binary_crossentropy',
                     optimizer=optimizer,
                     metrics=list_metrics)

history = model_B_on_A.fit_generator(train_gen,
                           epochs=20,
                           validation_data=val_gen,
                           callbacks = callback,
                           class_weight=class_weight,
                           verbose = 1)

# predicted_proba = model_B_on_A.predict(test_ds)
# list_actual_class, list_predict_class = get_clip_level_prediction(clip_level_list_actual_class, predicted_proba)
# precision = precision_score(y_true=list_actual_class, y_pred=list_predict_class)
# recall = recall_score(y_true=list_actual_class, y_pred=list_predict_class)
# specificity = recall_score(y_true=list_actual_class, y_pred=list_predict_class, pos_label=0)
# f1 = f1_score(y_true=list_actual_class, y_pred=list_predict_class, average='macro')
# acc = accuracy_score(y_true=list_actual_class, y_pred=list_predict_class)

# all_predicted_proba.extend(list_predict_class)
# all_list_actual_class.extend(list_actual_class)

dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)

print('\n\n\n After retrain all layers ')
print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')

In [None]:
model_B_on_A.save('resnet_tuned_on_google_aud.keras')

In [None]:
dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)

print('\n\n\n After retrain all layers ')
print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
        specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')

In [None]:
import tensorflow as tf

print(tf.test.is_gpu_available())
assert tf.test.is_built_with_cuda()

# ResNet50 on DataVerse

In [5]:
import librosa
import os
import numpy as np
import matplotlib.pyplot as plt
plt.ioff()

if WORKING_MAC:
    loc_raw_dataverse_files = '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/dataverse_files/'
    loc_fig_dataverse = '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Only_Spectrogram_Figure_Dataverse/' 
    # '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Figure_Datverse/'

In [None]:
def create_image(file_name):
    n_row = 3
    n_col = 2
    fig, axs = plt.subplots(n_col, n_row, figsize=(20, 20))

    signal, sr = librosa.load(file_name)
    N_FFT = int(sr * 0.025)
    HOP_LENGTH = int(sr * 0.0125)

    stft = librosa.stft(signal, n_fft = N_FFT, hop_length = HOP_LENGTH)
    
    librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), 
                            y_axis='log', x_axis='time', ax=axs[0, 0])
    librosa.display.specshow(librosa.feature.mfcc(S=stft, n_fft = N_FFT, hop_length = HOP_LENGTH), 
                            x_axis='time', y_axis='mel', ax=axs[0, 1])
    librosa.display.specshow(librosa.feature.chroma_stft(S=stft, n_fft = N_FFT, hop_length = HOP_LENGTH), 
                            y_axis='chroma', x_axis='time', ax=axs[0, 2])
    librosa.display.specshow(librosa.feature.chroma_cqt(y=signal, sr=sr),
                            y_axis='chroma', x_axis='time', ax=axs[1, 0])
    onset_env = librosa.onset.onset_strength(y = signal, sr=sr)
    librosa.display.specshow(librosa.feature.tempogram(onset_envelope = onset_env, sr = sr, hop_length = HOP_LENGTH), 
                            y_axis='tempo', x_axis='time', cmap='magma', ax=axs[1, 1])
    librosa.display.specshow(librosa.feature.fourier_tempogram(onset_envelope = onset_env, sr=sr, hop_length = HOP_LENGTH),
                            x_axis='time', y_axis='fourier_tempo', cmap='magma',
                            ax=axs[1, 2])
    
    for row in range(0, n_row):
        for col in range(0, n_col):
            axs[col][row].xaxis.label.set_visible(False)
            axs[col][row].yaxis.label.set_visible(False)
            axs[col][row].set_xticklabels([])
            axs[col][row].set_yticklabels([])
    return fig
    

In [None]:
def create_spectrogram(file_name):
    n_row = 1
    n_col = 1
    fig, axs = plt.subplots(n_col, n_row, figsize=(20, 20))

    signal, sr = librosa.load(file_name)
    N_FFT = int(sr * 0.025)
    HOP_LENGTH = int(sr * 0.0125)

    stft = librosa.stft(signal, n_fft = N_FFT, hop_length = HOP_LENGTH)
    
    librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), 
                            y_axis='log', x_axis='time', ax=axs)
    
    axs.xaxis.label.set_visible(False)
    axs.yaxis.label.set_visible(False)
    axs.set_xticklabels([])
    axs.set_yticklabels([])
    return fig
    

In [None]:
import warnings
warnings.filterwarnings('ignore')

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  n_file_processing = 0

  for class_folder in sorted(os.listdir(loc_data_folder))[:3]:
    if 'DS_Store' not in class_folder:
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))

      for file_name in sorted(os.listdir(os.path.join(loc_data_folder, class_folder))):
        if file_name.endswith('.wav') and (not os.path.exists(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))) and (librosa.get_duration(filename=os.path.join(loc_data_folder, class_folder, file_name)) > 0) :
            fig = create_spectrogram(os.path.join(loc_data_folder, class_folder, file_name))
            fig.savefig(os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png')))
            plt.close(fig)

        n_file_processing += 1
        print(n_file_processing, class_folder, os.path.basename(loc_data_folder))

# create_spectram_plot(loc_raw_dataverse_files,
#                      loc_fig_dataverse)

In [6]:
list_metrics = [keras.metrics.TruePositives(name='tp'),
                keras.metrics.FalsePositives(name='fp'),
                keras.metrics.TrueNegatives(name='tn'),
                keras.metrics.FalseNegatives(name='fn'),
                keras.metrics.F1Score(name='f1_score', threshold=0.5, average='macro'),
                keras.metrics.BinaryAccuracy(name='accuracy'),
                keras.metrics.Precision(name='precision'),
                keras.metrics.Recall(name='recall'),
                keras.metrics.AUC(name='auc')]

df_image_path = pd.DataFrame(columns = [str_filename, str_target])
loc_dataverse_image = loc_fig_dataverse
str_cls_1 = 'Class_1'
str_cls_0 = 'Class_0'


for fore_or_back_folder in os.listdir(loc_dataverse_image):
  if '.DS_Store' not in fore_or_back_folder:
    df_temp = pd.DataFrame({str_filename: [os.path.join(loc_dataverse_image, fore_or_back_folder, filename) 
                                          for filename in os.listdir(os.path.join(loc_dataverse_image, fore_or_back_folder))],
                            str_target: np.repeat(str_cls_1 if str_fore in fore_or_back_folder else str_cls_0,
                                                  len(os.listdir(os.path.join(loc_dataverse_image, fore_or_back_folder))))})
    df_image_path = pd.concat([df_image_path, df_temp])

df_image_path = shuffle(df_image_path, random_state=random_state)
df_image_path.reset_index(inplace=True, drop=True)

print(df_image_path.head(2)[str_filename].tolist())

2024-05-05 05:55:51.371119: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-05-05 05:55:51.371136: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-05-05 05:55:51.371141: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-05-05 05:55:51.371169: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-05 05:55:51.371183: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


['/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Only_Spectrogram_Figure_Dataverse/fold2_background/0_9754.png', '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Only_Spectrogram_Figure_Dataverse/fold3_background/0_26156.png']


In [16]:
def tune_resnet_model_for_dataverse():
  global  all_list_actual_class, all_predicted_proba
  
  all_list_actual_class = [] 
  all_predicted_proba = []

  image_size = 224
  input_size = 224
  img_width, img_height = 224, 224
  

  validation_ratio = 0.10
  test_ratio = 0.10

  n_epoch_for_bias = 1
  n_epoch_retraining = 25
  batch_size = 32

  skf = StratifiedKFold(n_splits=10, shuffle=True, random_state = random_state)

  for ith_fold, (train_index, test_index) in enumerate(skf.split(df_image_path[str_filename], df_image_path[str_target])):
      print(ith_fold)
      if ith_fold < 2:
          ################################# Handling Training Data ################################
          x_train = df_image_path.iloc[train_index][str_filename].tolist()
          y_train = df_image_path.iloc[train_index][str_target].tolist()
    
          df_temp_train = pd.DataFrame({str_filename: x_train, str_target: y_train})
          df_temp_train[str_fold] = np.repeat(1, df_temp_train.shape[0]) # 1 is used (randomly) to denote the train fold everywhere
          df_temp_train = shuffle(df_temp_train, random_state=random_state)
          df_temp_train.reset_index(inplace=True, drop=True)
          
          train_gen = ImageDataGenerator(samplewise_center = True,
                                        samplewise_std_normalization = True).flow_from_dataframe(
                                        dataframe = df_temp_train,
                                        x_col = str_filename,
                                        y_col = str_target,
                                        directory = loc_dataverse_image,
                                        target_size = (image_size, image_size),
                                        class_mode = 'binary',
                                        batch_size = batch_size,
                                        shuffle = True)
          classes = df_temp_train[str_target].str.replace('Class_', '').astype(np.int16)
          neg, pos = np.bincount(classes)
          total = neg + pos
          weight_for_0 = (1 / neg) * (total / 2.0)
          weight_for_1 = (1 / pos) * (total / 2.0)
          class_weight = {0: weight_for_0, 1: weight_for_1}
    
          ################################ Validation and Test data ################################
          x_test = df_image_path.iloc[test_index][str_filename].tolist()
          y_test = df_image_path.iloc[test_index][str_target].tolist()
    
          x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size = test_ratio / (test_ratio + validation_ratio),
                                                          shuffle=True, stratify = y_test, random_state = random_state)
          df_temp_val = pd.DataFrame({str_filename: x_val, str_target: y_val})
          df_temp_val[str_fold] = np.repeat(2, df_temp_val.shape[0])
    
          df_temp_test = pd.DataFrame({str_filename: x_test, str_target: y_test})
          df_temp_test[str_fold] = np.repeat(3, df_temp_test.shape[0])
    
          pd_data = pd.concat([df_temp_train, df_temp_val, df_temp_test])
          
          ################################ Image Data Generator ################################
          for train_fold, val_fold, test_fold in ((1, 2, 3), (1, 3, 2)):
            val_gen = ImageDataGenerator(samplewise_center=True,
                                          samplewise_std_normalization=True).flow_from_dataframe(
                                          dataframe=pd_data[pd_data[str_fold] == val_fold],
                                          x_col = str_filename,
                                          y_col = str_target,
                                          directory = loc_dataverse_image,
                                          target_size = (image_size, image_size),
                                          class_mode ='binary',
                                          batch_size = batch_size,
                                          shuffle=True)
    
            test_gen = ImageDataGenerator(samplewise_center=True,
                                          samplewise_std_normalization=True).flow_from_dataframe(
                                          dataframe=pd_data[pd_data[str_fold] == test_fold],
                                          x_col = str_filename,
                                          y_col = str_target,
                                          directory=loc_dataverse_image,
                                          target_size=(image_size, image_size),
                                          class_mode='binary',
                                          batch_size= batch_size,
                                          shuffle=True)
    
            ###################### Model training (For bias in the last layer) ######################
            if K.image_data_format() == 'channels_first':
                input_shape = (3, img_width, img_height)
            else:
                input_shape = (img_width, img_height, 3)
    
            resnet_model = ResNet50(include_top=False, weights='imagenet')
    
            # Create a new Sequential model and add the ResNet50 model as a layer
            keras_ResNet_model = tf.keras.Sequential([resnet_model])
    
            model_B_on_A = keras.models.Sequential(keras_ResNet_model.layers[:-1])
    
            model_B_on_A.add(keras.layers.Conv2D(32, (3, 3), input_shape=input_shape))
            model_B_on_A.add(keras.layers.Activation('relu'))
            model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
            model_B_on_A.add(keras.layers.Conv2D(32, (3, 3)))
            model_B_on_A.add(keras.layers.Activation('relu'))
            model_B_on_A.add(keras.layers.Dropout(0.1))
            model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
            model_B_on_A.add(keras.layers.Conv2D(64, (3, 3)))
            model_B_on_A.add(keras.layers.Activation('relu'))
            model_B_on_A.add(keras.layers.Dropout(0.2))
            model_B_on_A.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
            model_B_on_A.add(keras.layers.Flatten())  # this converts our 3D feature maps to 1D feature vectors
            model_B_on_A.add(keras.layers.Dense(64))
            model_B_on_A.add(keras.layers.Activation('relu'))
            model_B_on_A.add(keras.layers.Dropout(0.5))
            model_B_on_A.add(keras.layers.Dense(1))
            model_B_on_A.add(keras.layers.Activation('sigmoid'))
    
            for layer in model_B_on_A.layers[:-1]:
              layer.trainable = False
            
            # from tensorflow.keras.optimizers.legacy import Adam
            
            optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
            callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    
            model_B_on_A.compile(loss='binary_crossentropy',
                                optimizer=optimizer,
                                metrics=list_metrics)
    
            history = model_B_on_A.fit(train_gen,
                                      epochs = n_epoch_for_bias,
                                      validation_data = val_gen,
                                      callbacks = callback,
                                      class_weight = class_weight,
                                      verbose = 1)
            
            dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
            specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))
            print('\n\n\n Training only the last layers 🙈😡😠')
            print('Test Performance\n\n', dict_predict_test.get('precision'), dict_predict_test.get('recall'), specificity,
                    dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
    
            ###################### Re-training all layers ######################
            print('Hey, retraining 😍😍😍🤜🤛')
            for layer in model_B_on_A.layers[:-1]:
                layer.trainable = True
    
            optimizer = keras.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999) # the default lr is 1e-3
            model_B_on_A.compile(loss='binary_crossentropy',
                                optimizer=optimizer,
                                metrics=list_metrics)
    
            history = model_B_on_A.fit(train_gen,
                                      epochs = n_epoch_retraining,
                                      validation_data=val_gen,
                                      callbacks = callback,
                                      class_weight=class_weight,
                                      verbose = 1)
    
            all_predicted_proba.extend([arr[0] for arr in list(model_B_on_A.predict(test_gen))])
            all_list_actual_class.extend(pd_data[pd_data[str_fold] == test_fold][str_target].tolist())
    
            dict_predict_test = model_B_on_A.evaluate(test_gen, return_dict=True)
            specificity = dict_predict_test.get('tn')/(dict_predict_test.get('tn') + dict_predict_test.get('fp'))
            
            print('\n\n\n After retrain all layers ')
            print('Test Performance\n\n',dict_predict_test.get('precision'), dict_predict_test.get('recall'),
                    specificity, dict_predict_test.get('accuracy'), dict_predict_test.get('auc'), (specificity + dict_predict_test.get('recall'))/2, '\n\n\n')
            

tune_resnet_model_for_dataverse()

0
Found 71530 validated image filenames belonging to 2 classes.
Found 3974 validated image filenames belonging to 2 classes.
Found 3974 validated image filenames belonging to 2 classes.







 Training only the last layers 🙈😡😠
Test Performance

 0.04273504391312599 0.003703703638166189 0.9573170731707317 0.6333668828010559 0.35060709714889526 0.48051038840444893 



Hey, retraining 😍😍😍🤜🤛
Epoch 1/25


2024-05-05 07:29:39.231640: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25



 After retrain all layers 
Test Performance

 0.8322091102600098 0.7311111092567444 0.9241615853658537 0.8585807681083679 0.9235974550247192 0.8276363473112991 



Found 3974 validated image filenames belonging to 2 classes.
Found 3974 validated image filenames belonging to 2 classes.







 Training only the last layers 🙈😡😠
Test Performance

 0.2762003242969513 0.39659005403518677 0.4659047619047619 0.44237545132637024 0.4070504307746887 0.4312474079699743 



Hey, retraining 😍😍😍🤜🤛
Epoch 1/25


2024-05-06 09:07:35.788540: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25



 After retrain all layers 
Test Performance

 0.7481775879859924 0.8369162082672119 0.8552380952380952 0.8490186333656311 0.9283145666122437 0.8460771517526535 



1
Found 71530 validated image filenames belonging to 2 classes.
Found 3974 validated image filenames belonging to 2 classes.
Found 3974 validated image filenames belonging to 2 classes.







 Training only the last layers 🙈😡😠
Test Performance

 0.3397080898284912 1.0 0.0 0.3397080898284912 0.5115448832511902 0.5 



Hey, retraining 😍😍😍🤜🤛
Epoch 1/25


2024-05-07 06:33:51.673821: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Adam/AssignAddVariableOp.


Epoch 2/25
 398/2236 [====>.........................] - ETA: 1:53:21 - loss: 0.4811 - tp: 3382.0000 - fp: 1747.0000 - tn: 6610.0000 - fn: 997.0000 - f1_score: 0.7114 - accuracy: 0.7845 - precision: 0.6594 - recall: 0.7723 - auc: 0.8589

KeyboardInterrupt: 

In [3]:

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Create 2 virtual GPUs with 1GB memory each
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024),
         tf.config.LogicalDeviceConfiguration(memory_limit=1024),
         tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)


1 Physical GPU, 1 Logical GPUs


2024-05-05 04:40:50.618340: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-05-05 04:40:50.618365: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-05-05 04:40:50.618372: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-05-05 04:40:50.618441: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-05 04:40:50.618468: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
!pip install keras

In [None]:
!pip install keras==2.15.0

In [None]:
!pip install tensorflow==2.15.0

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50

# Load the ResNet50 model without the top (classification) layer
resnet_model = ResNet50(include_top=False, weights='imagenet')

# Create a new Sequential model and add the ResNet50 model as a layer
keras_ResNet_model = tf.keras.Sequential([resnet_model])

# Optionally, you can add additional layers to the Sequential model here
# For example:
# keras_ResNet_model.add(tf.keras.layers.Flatten())

# Compile and train the model as needed



In [None]:
version_fn = getattr(tf.keras, "version", None)
if version_fn and version_fn().startswith("3."):
  import tf_keras as keras
else:
  keras = tf.keras

In [None]:
print(all_predicted_proba)

In [None]:
result_arrays = [np.array([0.42610288], dtype=np.float32), np.array([0.41446418])]

# Extract values from each array
values = [arr[0] for arr in result_arrays]

print(values)


# Operation Theater

In [12]:
import librosa
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
str_fore = 'foreground'

plt.ioff()

loc_raw_dataverse_files = '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/dataverse_files/'
loc_fig_dataverse = '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Only_Spectrogram_Figure_Dataverse/' 
# '/Users/wyd2hu/Documents/SA39/ForegroundSpeech/Figure_Datverse/'

def create_spectrogram(file_name, loc_fig):
    n_row = 1
    n_col = 1
    fig, axs = plt.subplots(n_col, n_row, figsize=(20, 20))

    signal, sr = librosa.load(file_name)
    N_FFT = int(sr * 0.025)
    HOP_LENGTH = int(sr * 0.0125)

    stft = librosa.stft(signal, n_fft = N_FFT, hop_length = HOP_LENGTH)
    
    librosa.display.specshow(librosa.amplitude_to_db(stft, ref=np.max), 
                            y_axis='log', x_axis='time', ax=axs)
    
    axs.xaxis.label.set_visible(False)
    axs.yaxis.label.set_visible(False)
    axs.set_xticklabels([])
    axs.set_yticklabels([])
    
    fig.savefig(loc_fig)
    plt.close(fig)
    

import warnings
warnings.filterwarnings('ignore')

def create_spectram_plot(loc_data_folder, loc_fig_folder):
  n_file_processing = 0

  for class_folder in sorted(os.listdir(loc_data_folder)):
    if 'DS_Store' not in class_folder:
      if not os.path.isdir(os.path.join(loc_fig_folder, class_folder)):
        os.mkdir(os.path.join(loc_fig_folder, class_folder))

      for file_name in sorted(os.listdir(os.path.join(loc_data_folder, class_folder))):
        if file_name.endswith('.wav'):   
            try:
                loc_fig = os.path.join(loc_fig_folder, class_folder, file_name.replace('wav', 'png'))
                # print(loc_fig)
                img = Image.open(loc_fig)
                img.verify()
            except Exception as e:
                print('Bad file:', class_folder, os.path.basename(loc_fig))
                os.remove(loc_fig)
                create_spectrogram(os.path.join(loc_data_folder, class_folder, file_name), loc_fig)

        # n_file_processing += 1
        # print(n_file_processing, class_folder, os.path.basename(loc_data_folder))

create_spectram_plot(loc_raw_dataverse_files,
                     loc_fig_dataverse)

In [13]:
print("Done1")

Done1
