In [None]:
########################################################################
# import default libraries
########################################################################
import os
import sys
import gc
########################################################################



########################################################################
# import additional libraries
########################################################################
import numpy as np
import scipy.stats
# from import
import matplotlib.pyplot as plt
from tqdm import tqdm
try:
    from sklearn.externals import joblib
except:
    import joblib
# original lib
import keras_model_doa
import pandas as pd
import keras
import tensorflow as tf
import librosa as lb
import keras.backend as K
import common as com
from __future__ import division 
import numpy as np
import sys
import cv2
import keras_model_doa_ps
from scipy.signal import butter,sosfilt,correlate2d
from tensorflow.keras.utils import to_categorical
import tensorflow_io as tfio
import keras_model_unet_1D
import tensorflow_probability as tfp
import math
import keras_model_unet_crm
import keras_model_uformer
from scipy.interpolate import interp1d
from feature_extractor import salsa
############

caused by: ['/usr/local/lib/python3.9/dist-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl5mutexC1Ev']
caused by: ['/usr/local/lib/python3.9/dist-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZNK10tensorflow4data11DatasetBase8FinalizeEPNS_15OpKernelContextESt8functionIFN3tsl8StatusOrISt10unique_ptrIS1_NS5_4core15RefCountDeleterEEEEvEE']


In [2]:
########################################################################
# load parameter.yaml
########################################################################
param = com.yaml_load()
########################################################################
saved_weight = os.path.join(param["P_MODELSAVE"], 'dataweights.{epoch:02d}-{val_dense_1_binary_accuracy:.2f}.hdf5')
early_stopping = keras.callbacks.EarlyStopping(monitor = 'val_mask_loss', patience = 10)
# model_unet = keras_model.get_model(input_shape=(1,param["feature"]["n_mels"],param["feature"]["n_frames"]), lr = param["fit"]["lr"])

import warnings
warnings.filterwarnings("ignore", message="PySoundFile failed. Trying audioread instead.")


In [None]:
########################################################################
# get data from the list for file paths
########################################################################
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = fs
    low = lowcut / nyq
    high = highcut / nyq
    sos = butter(order, [low, high], analog=False, btype='band', output='sos')
    return sos

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    sos = butter_bandpass(lowcut, highcut, fs, order=order)
    y = sosfilt(sos, data)
    return y

def cal_rms(amp):
    return tf.math.sqrt(tf.math.reduce_mean(tf.math.square(amp), axis=-1))




def mse(y_true, y_pred):
    tf.print(K.square(y_pred - y_true))
    return K.mean(K.square(y_pred - y_true), axis=-1)

def SALSA(audio, fs=22050, n_fft=1024, hop_length=345
          , feature_type = 'salsa_lite'):
    audio_input = audio.numpy()
    # Compute stft
    log_specs = []
    n_mics = 4
    n_bins = n_fft // 2 + 1
    fmax = 6050  # Hz
    fmin_doa = 520
    fmax_doa = 6050
    lower_bin = int(np.floor(fmin_doa * n_fft / np.float32(fs)))  # 512: 1; 256: 0
    upper_bin = int(np.floor(fmax_doa * n_fft / np.float32(fs)))  # 9000Hz: 512: 192, 256: 96
    cutoff_bin = int(np.floor(fmax * n_fft / np.float32(fs)))  # 9000 Hz, 512 nfft: cutoff_bin = 192
    c = 343
    delta = 2 * np.pi * fs / (n_fft * c)
    freq_vector = np.arange(n_bins)
    freq_vector[0] = 1
    freq_vector = freq_vector[:, None, None]  # n_bins x 1 x 1
    lower_bin = np.max((1, lower_bin))
    testt = []
    for imic in np.arange(n_mics):
        stft = lb.stft(y=np.asfortranarray(audio_input[imic, :]), n_fft=n_fft, hop_length=hop_length,
                            center=True, window='hann', pad_mode='reflect')
        if imic == 0:
            n_frames = stft.shape[1]
            X = np.zeros((n_bins, n_frames, n_mics), dtype='complex')  # (n_bins, n_frames, n_mics)
        X[:, :, imic] = stft
        # Compute log linear power spectrum
        spec = (np.abs(stft) ** 2).T
        log_spec = np.expand_dims(spec, axis=0)
        log_specs.append(log_spec)
    log_specs = np.concatenate(log_specs, axis=0)  # (n_mics, n_frames, n_bins)

    # Compute spatial feature
    phase_vector = np.angle(X[:, :, 1:] * np.conj(X[:, :, 0, None]))
    if feature_type == 'salsa_ipd':
        phase_vector = phase_vector / np.pi
    elif feature_type == 'salsa_lite':
        phase_vector = phase_vector / (delta * freq_vector)
    phase_vector = np.transpose(phase_vector, (2, 1, 0))  # (n_mics, n_frames, n_bins)
    # Crop frequency
    log_specs = log_specs[:, :, lower_bin:cutoff_bin]
    phase_vector = phase_vector[:, :, lower_bin:cutoff_bin]
    phase_vector[:, :, upper_bin:] = 0
    # Stack features
    audio_feature = np.concatenate((log_specs, phase_vector), axis=0)
    return audio_feature

class MelSpecGccExtractor():
    """
    Extract log-mel spectrograms and GCC-PHAT features.
    """
    def __init__(self):
        """
        :param fs: Sampling rate.
        :param n_fft: Number of FFT points.
        :param hop_length: Number of sample for hopping.
        :param n_mels: Number of mel bands.
        :param win_length: Window length <= n_fft. If None, assign n_fft
        :param fmin: Min frequency to extract feature (Hz).
        :param fmax: Max frequency to extract feature (Hz).
        :param window: Type of window.
        """
        self.n_mels = 256
        self.fs = 22050
        self.window = "hann"
        self.n_fft = 2048
        self.hop_length = 345
        self.win_length = None
        self.fmin = 50
        self.fmax = None
        self.melW = lb.filters.mel(sr=self.fs, n_fft=self.n_fft, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax)

    def gcc_phat(self, sig, refsig) -> np.ndarray:
        """
        Compute GCC-PHAT between sig and refsig.
        :param sig: <np.ndarray: (n_samples,).
        :param refsig: <np.ndarray: (n_samples,).
        :return: gcc_phat: <np.ndarray: (1, n_frames, n_mels)>
        """
        ncorr = 2 * self.n_fft - 1
        n_fft = int(2 ** np.ceil(np.log2(np.abs(ncorr))))  # this n_fft double the length of win_length
        Px = lb.stft(y=np.asfortranarray(sig),
                          n_fft=n_fft,
                          hop_length=self.hop_length,
                          win_length=self.win_length,
                          center=True,
                          window=self.window,
                          pad_mode='reflect')
        Px_ref = lb.stft(y=np.asfortranarray(refsig),
                              n_fft=n_fft,
                              hop_length=self.hop_length,
                              win_length=self.win_length,
                              center=True,
                              window=self.window,
                              pad_mode='reflect')
        freq_filter = np.ones((n_fft//2 + 1, 1))
        k_cutoff = int(4000 / self.fs * n_fft)
        k_buffer = int(400 / self.fs * n_fft)
        cos_x = np.arange(2 * k_buffer) * (np.pi/2) / (2 * k_buffer - 1)
        freq_filter[k_cutoff - k_buffer: k_cutoff + k_buffer, 0] = np.cos(cos_x)
        Px = Px * freq_filter
        Px_ref = Px_ref * freq_filter

        R = Px * np.conj(Px_ref)
        n_frames = R.shape[1]
        gcc_phat = []
        for i in range(n_frames):
            spec = R[:, i].flatten()
            cc = np.fft.irfft(np.exp(1.j * np.angle(spec)))
            cc = np.concatenate((cc[-self.n_mels // 2:], cc[:self.n_mels // 2]))
            gcc_phat.append(cc)
        gcc_phat = np.array(gcc_phat)
        gcc_phat = gcc_phat[None, :, :]
        return gcc_phat

    def logmel(self, sig) -> np.ndarray:
        """
        Compute logmel of single channel signal
        :param sig: <np.ndarray: (n_samples,).
        :return: logmel: <np.ndarray: (1, n_frames, n_mels)>.
        """
        spec = np.abs(lb.stft(y=np.asfortranarray(sig),
                                   n_fft=self.n_fft,
                                   hop_length=self.hop_length,
                                   win_length=self.win_length,
                                   center=True,
                                   window=self.window,
                                   pad_mode='reflect'))

        mel_spec = np.dot(self.melW, spec ** 2).T
        logmel_spec = lb.power_to_db(mel_spec, ref=1.0, amin=1e-10, top_db=None)
        logmel_spec = np.expand_dims(logmel_spec, axis=0)

        return logmel_spec

    def extract(self, audio_input: np.ndarray) -> np.ndarray:
        """
        :param audio_input: <np.ndarray: (n_channels, n_samples)>.
        :return: features <np.ndarray: (n_channel + n_channels*(n_channel-1)/2, n_timeframes, n_features)>.
        """
        n_channels = audio_input.shape[0]
        features = []
        gcc_features = []
        for n in range(n_channels):
            features.append(self.logmel(audio_input[n]))
            for m in range(n + 1, n_channels):
                gcc_features.append(self.gcc_phat(sig=audio_input[m], refsig=audio_input[n]))

        features.extend(gcc_features)
        features = np.concatenate(features, axis=0)

        return features

In [None]:
def cross_corr(data1):
    horizontal = correlate2d(data1[0], data1[1], mode='same')
    vertical = correlate2d(data1[3], data1[2], mode='same')
    horizontal = tf.expand_dims(horizontal, 0)
    vertical = tf.expand_dims(vertical, 0)
    cross = tf.concat([horizontal, vertical], axis = 0)
    return cross

def power_to_db(S, amin=1e-16, top_db=80.0):
    """Convert a power-spectrogram (magnitude squared) to decibel (dB) units.
    Computes the scaling ``10 * log10(S / max(S))`` in a numerically
    stable way.
    Based on:
    https://librosa.github.io/librosa/generated/librosa.core.power_to_db.html
    """
    def _tf_log10(x):
        numerator = tf.math.log(x)
        denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
        return numerator / denominator
    
    # Scale magnitude relative to maximum value in S. Zeros in the output 
    # correspond to positions where S == ref.
    ref = tf.reduce_max(S)

    log_spec = 10.0 * _tf_log10(tf.maximum(amin, S))
    log_spec -= 10.0 * _tf_log10(tf.maximum(amin, ref))

    log_spec = tf.maximum(log_spec, tf.reduce_max(log_spec) - top_db)

    return log_spec


# AZIMUTH
def readcsv(item):
    df = pd.read_csv(item.numpy().decode())
    x_ = (tf.cast(df["x_128"][:128].astype('float32'), tf.float32))
    # df["y_128"][:128].astype('float32')
    # df.loc[df["y_128"] < 0, "y_128"].astype('float32')*-1
    y_ = tf.cast((df["y_128"][:128].astype('float32')), tf.float32)
    
    label_ = []
    for n in df["label_128"][:128]:
        label_.append(tf.one_hot(int(n), 4))
    label_ = tf.stack(label_)
    return x_, y_, label_



def band(data):
    return butter_bandpass_filter(data.numpy(), 750, 22050/2-5, 22050)

def mel_band(data):
    bandpass = butter_bandpass_filter(data.numpy(), 750, 22050/2-5, 22050)
    mel_spec = lb.feature.melspectrogram(y=bandpass, n_fft=1024, hop_length=320, n_mels=128, center=False, fmin=0,
                                                fmax=22050/2)
    return mel_spec


def test_predict(data):
    return model.predict(data)

######
# SALSA
######
@tf.function
def file_read_doa(combined_path, direction, model = keras_model_uformer.wave_u_net(num_initial_filters = 64, num_layers = 4, 
           source_names = ["siren"], num_channels = 4, output_filter_size = 1,
           padding = "same", input_size = 44100, context = False, upsampling_type = "direct",
           output_activation = "tanh", output_type = "direct"),
            part=0):
    """
    convert file_name to a vector array.

    file_name : str
        target .wav file

    return : numpy.array( numpy.array( float ) )
        vector array
        * dataset.shape = (dataset_size, feature_vector_length)
    """

    # generate combined audio data using tf.io
    model.load_weights("unet_model_1D/parts/new_conformer_mic_com_top_024.h5")
    combined_contents = tf.io.read_file(combined_path)
    combined, sr = tf.audio.decode_wav(combined_contents)
    combined = tf.unstack(combined, axis=-1, num=4)
    dic = {"1": [0, 1, 2, 3], "2": [2, 1, 0, 3], "3": [2, 3, 0, 1], "4": [0, 3, 2, 1]}
    combined   = tf.stack([combined[dic[part][0]], combined[dic[part][1]], combined[dic[part][2]], combined[dic[part][3]]], axis=-1)
    combined = tf.transpose(tf.py_function(band, [tf.transpose(combined)], (tf.float32)))
    combined = tf.reshape(combined , [1,tf.shape(combined)[0], tf.shape(combined)[1]])  
    combined = tf.py_function(model, [combined[:, :, :]], (tf.float32))    
    combined = tf.squeeze(tf.transpose(combined))
    combined = tf.py_function(band, [combined], (tf.float32))
    features = tf.py_function(SALSA, [combined], (tf.float32))
    features = tf.reshape(features , [tf.shape(features)[0], tf.shape(features)[1], tf.shape(features)[2]])  
    features = tf.transpose(features)
    log = features[:,:,:4]
    salsa = features[:,:,4:]
    
    onehot_f = power_to_db(log[:,:,0], top_db=80)
    onehot = tf.math.reduce_max(onehot_f, 0)
    
    onehot = tf.where(tf.math.less_equal(onehot, -20), 0., onehot)
    onehot = tf.where(tf.math.not_equal(onehot, 0), 1., onehot)
    onehot_1 = tf.repeat(tf.expand_dims(onehot, 1), 4, 1)
    onehot_ = tf.repeat(tf.expand_dims(onehot, 0), 256, 0)
    onehot_2 = tf.repeat(tf.expand_dims(onehot_, -1), 3, -1)
    onehot_3 = tf.repeat(tf.expand_dims(onehot_, -1), 4, -1)
    
    onehot_4 = tf.repeat(tf.expand_dims(onehot, -1), 2, -1)
    onehot = onehot
    
    log = power_to_db(log*onehot_3, top_db=80)
    salsa = salsa*onehot_2
    features = tf.concat([log, salsa], -1)
    dic2 = {"1":[1, 1], "2":[-1, 1], "3":[-1, -1], "4":[1, 1]}
    
    x, y, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32,tf.float32))

    label_ = tf.reshape(label_ , [128, 4])
    inactive = tf.reduce_max(label_, 1)
    
    x_ = x*inactive*dic2[part][0]
    y_ = y*inactive*dic2[part][1]

    xy_dir = tf.stack([x_, y_, inactive], axis=1)
    return features, {'doa_out':xy_dir, 'sed_out':label_}


######
# GCC PHAT
#####
# @tf.function
# def file_read_doa(combined_path, direction, model = keras_model_unet_crm.model(input_size = (4, 44100)), part=0):
#     """
#     convert file_name to a vector array.

#     file_name : str
#         target .wav file

#     return : numpy.array( numpy.array( float ) )
#         vector array
#         * dataset.shape = (dataset_size, feature_vector_length)
#     """
#     model.load_weights("unet_model_orm/dcunet_stft_mic1_074.h5")
#     # generate combined audio data using tf.io
#     extract_feature =  MelSpecGccExtractor()
#     combined_contents = tf.io.read_file(combined_path)
#     combined, sr = tf.audio.decode_wav(combined_contents)
#     combined = tf.unstack(combined, axis=-1, num=4)
#     dic = {"1": [0, 1, 2, 3], "2": [1, 0, 2, 3], "3": [1, 0, 3, 2], "4": [0, 1, 3, 2]}
#     # denoised   = tf.stack([combined[dic[part][0]], combined[dic[part][1]]], axis=-1)
#     combined   = tf.stack([combined[dic[part][0]], combined[dic[part][1]], combined[dic[part][2]], combined[dic[part][3]]], axis=-1)

#     combined = tf.py_function(band, [tf.transpose(combined)], (tf.float32))
#     combined = tf.reshape(combined , [1,tf.shape(combined)[0], tf.shape(combined)[1]])  
#     # max_ = tf.math.reduce_max(tf.abs(combined))
#     # combined = tf.math.divide(combined,max_)

#     denoised = tf.py_function(model, [combined], (tf.float32))    
#     denoised = tf.squeeze(denoised)
#     # denoised = combined
#     denoised = tf.py_function(band, [tf.transpose(denoised)], (tf.float32))
#     features = tf.py_function(extract_feature.extract, [denoised], (tf.float32))
#     features = tf.reshape(features , [tf.shape(features)[0], tf.shape(features)[1], tf.shape(features)[2]])  
#     features = tf.transpose(features)
    
#     # x_, y_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32,tf.float32))
#     # x_ = tf.reshape(x_ , [128]) 
#     # y_ = tf.reshape(y_ , [128]) 
#     # label_ = tf.reshape(label_ , [128, 4])
#     # xy_dir = tf.stack([x_, y_, tf.reduce_max(label_, 1)], axis=1)
#     # return features, {'doa_out':xy_dir[32*part:32*part+32,:], 'sed_out':label_[32*part:32*part+32,:]}
    
# #     dir_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32))
# #     dir_ = tf.reshape(dir_ , [128, 37]) 
# #     label_ = tf.reshape(label_ , [128, 4])
    
# #     return features, {'doa_out':dir_[32*part:32*part+32,:], 'sed_out':label_[32*part:32*part+32,:]}

#     dir_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32))
#     dic2 = {"1":[1, 1], "2":[-1, 1], "3":[-1, 1], "4":[1, -1]}
#     label_ = tf.reshape(label_ , [128, 4])
#     x_ = tf.math.cos(dir_)*tf.reduce_max(label_, 1)*dic2[part][0]
#     y_ = tf.math.sin(dir_)*tf.reduce_max(label_, 1)*dic2[part][1]
    
    
#     xy_dir = tf.stack([x_, y_, tf.reduce_max(label_, 1)], axis=1)
    
#     return tf.transpose(features), {'doa_out':xy_dir, 'sed_out':label_}




######
# GCC
######


# @tf.function
# def file_read_doa(combined_path, direction, part=0, n =0):
#     """
#     convert file_name to a vector array.

#     file_name : str
#         target .wav file

#     return : numpy.array( numpy.array( float ) )
#         vector array
#         * dataset.shape = (dataset_size, feature_vector_length)
#     """
#     # model = keras_model_uformer.wave_u_net(num_initial_filters = 64, num_layers = 4, 
#     #        source_names = ["siren"], num_channels = 4, output_filter_size = 1,
#     #        padding = "same", input_size = 44100, context = False, upsampling_type = "direct",
#     #        output_activation = "tanh", output_type = "direct")
#     # model.summary()
#     # model.load_weights("unet_model_1D/parts/new_conformer_030.h5")
    
#     # generate combined audio data using tf.io
#     combined_contents = tf.io.read_file(combined_path)
#     combined, sr = tf.audio.decode_wav(combined_contents)
#     combined = tf.unstack(combined, axis=-1, num=4)
#     dic = {"1": [0, 1, 2, 3], "2": [1, 0, 2, 3], "3": [1, 0, 3, 2], "4": [0, 1, 3, 2]}
#     combined   = tf.stack([combined[dic[part][0]], combined[dic[part][1]], combined[dic[part][2]], combined[dic[part][3]]], axis=-1)
    
#     combined_mel = tf.py_function(mel_band, [tf.transpose(combined)[:,11025*n:11025*(n+1)]], (tf.float32))
#     # combined_mel = tf.transpose(combined_mel)
# #     combined_mel = tf.reshape(combined_mel , [1,tf.shape(combined_mel)[1], tf.shape(combined_mel)[0], tf.shape(combined_mel)[2]])  

# #     # combined_mel = tf.reshape(combined_mel , [1,tf.shape(combined_mel)[1], tf.shape(combined_mel)[2], tf.shape(combined_mel)[0]])  
# #     mask = tf.py_function(model, [combined_mel], (tf.float32))    
# #     mask = tf.squeeze(mask)
# #     combined_mel = tf.squeeze(combined_mel)
# #     denoised_mel = (mask*combined_mel)
# #     denoised_mel = tf.transpose(denoised_mel)
#     cross_correlation = tf.py_function(cross_corr, [combined_mel], (tf.float32))   
#     cross_correlation = tf.reshape(cross_correlation, [tf.shape(cross_correlation)[0], tf.shape(cross_correlation)[1], tf.shape(cross_correlation)[2]]) 
#     denoised_mel = tf.concat([combined_mel, cross_correlation], axis=0)        
#     # denoised_mel = tfio.audio.dbscale(denoised_mel, top_db=80)
#     denoised_mel = tfio.audio.dbscale(denoised_mel, top_db=100)
#     denoised_mel = tf.transpose(denoised_mel)
    
#     dir_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32))
#     dic2 = {"1":[1, 1], "2":[-1, 1], "3":[-1, 1], "4":[1, -1]}
#     label_ = tf.reshape(label_ , [128, 4])
#     x_ = tf.math.cos(dir_)*tf.reduce_max(label_, 1)*dic2[part][0]
#     y_ = tf.math.sin(dir_)*tf.reduce_max(label_, 1)*dic2[part][1]
    
    
#     xy_dir = tf.stack([x_, y_, tf.reduce_max(label_, 1)], axis=1)
    
#     return tf.transpose(denoised_mel), {'doa_out':xy_dir[::2,:][::2,:][::2,:][4*n:4*(n+1),:], 'sed_out':label_[::2,:][::2,:][::2,:][4*n:4*(n+1),:]}

#     # x_, y_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32,tf.float32))

#     # x_ = tf.reshape(x_ , [128]) 
#     # y_ = tf.reshape(y_ , [128]) 
#     # label_ = tf.reshape(label_ , [128, 4])
#     # xy_dir = tf.stack([x_, y_, tf.reduce_max(label_, 1)], axis=1)
#     # return tf.transpose(denoised_mel, [1,0,2]), {'doa_out':xy_dir, 'sed_out':label_}
# #     dir_, label_ =  tf.py_function(readcsv, [direction], (tf.float32,tf.float32))
# #     dir_ = tf.reshape(dir_ , [128, 37]) 
# #     label_ = tf.reshape(label_ , [128, 4])
    
# #     return denoised_mel, {'doa_out':dir_, 'sed_out':label_}


# 


In [None]:
#################### Dataset Load #############################################
type_ = "topm_real"

combined_loc = f"dev_data_doa/combined_ground_truth_topm_real/train_noise/"
combined_loc_test = f"dev_data_doa/combined_ground_truth_topm_real/test/"

dir_loc = f"dev_data_doa/direction_ground_truth_top/train_new/"

dir_loc_test = f"dev_data_doa/direction_ground_truth_top/test_new/"



df = pd.read_csv(f"dev_data_doa/combined_ground_truth_topm_real/train_noise_combined_extra.csv")
jk = pd.read_csv(f"dev_data_doa/combined_ground_truth_topm_real/val_combined_extra.csv")



print(len(df))



df['combined_name'] = combined_loc + df['combined_name'].astype(str)
df['direction'] = dir_loc + df['anomaly_name'].astype(str) + "_direction.csv"



jk['combined_name'] = combined_loc_test + jk['combined_name'].astype(str)
jk['direction'] = dir_loc_test + jk['anomaly_name'].astype(str) + "_direction.csv"


jk = pd.concat([jk], ignore_index=True)
jk = jk.loc[:, ~jk.columns.str.contains('^Unnamed')]
jk = jk.dropna()
df = df.reset_index(drop=True)
jk_clean_test = jk.pop('combined_name')
jk_dir_test = jk.pop('direction')

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df = df.dropna()

print(df)


df_clean_train = df.pop('combined_name')
df_dir_train = df.pop('direction')



print("\n===========================")

print("============== DATASET_GENERATOR ==============")

BATCH_SIZE = 128
train_dataset = tf.data.Dataset.from_tensor_slices((df_clean_train, df_dir_train))

print(train_dataset.cardinality())


data_train1 = train_dataset.map(lambda x,y: file_read_doa(x,y, part="1"))
data_train2 = train_dataset.map(lambda x,y: file_read_doa(x,y, part="2"))
data_train3 = train_dataset.map(lambda x,y: file_read_doa(x,y, part="3"))

data_train = data_train1.concatenate(data_train2).concatenate(data_train3).cache("tmp1/cache").shuffle(16256).batch(BATCH_SIZE, drop_remainder=True)


val_dataset = tf.data.Dataset.from_tensor_slices((jk_clean_test, jk_dir_test))
val_dataset = val_dataset
data_val1 = val_dataset.map(lambda x,y: file_read_doa(x,y, part="1"))

data_val = data_val1.cache("tmp_doa2/cache").batch(32).shuffle(100)
                                                                                        

print(data_val.cardinality(), data_train.cardinality())



6697
                                          combined_name  \
0     dev_data_doa/combined_ground_truth_topm_real/t...   
1     dev_data_doa/combined_ground_truth_topm_real/t...   
2     dev_data_doa/combined_ground_truth_topm_real/t...   
3     dev_data_doa/combined_ground_truth_topm_real/t...   
4     dev_data_doa/combined_ground_truth_topm_real/t...   
...                                                 ...   
6692  dev_data_doa/combined_ground_truth_topm_real/t...   
6693  dev_data_doa/combined_ground_truth_topm_real/t...   
6694  dev_data_doa/combined_ground_truth_topm_real/t...   
6695  dev_data_doa/combined_ground_truth_topm_real/t...   
6696  dev_data_doa/combined_ground_truth_topm_real/t...   

                              anomaly_name                   snr  label  \
0     Crash-05_case_7_crash_Velocity_60kmh  [-29, -25, -32, -25]      0   
1     Crash-05_case_7_crash_Velocity_60kmh  [-24, -24, -23, -23]      0   
2     Crash-05_case_3_crash_Velocity_40kmh      [-4, -4, -3, 

In [None]:
import math

########################
### ACCDOA
########################
from scipy.optimize import linear_sum_assignment

@tf.function
def tf_linear_sum_assignment(cost_matrix):
    return tf.numpy_function(func=linear_sum_assignment,inp=[cost_matrix],Tout=[tf.int64,tf.int64])

def accuracy_masked(y_gt, pred):
    x = y_gt[:, :,:4] 
    y = y_gt[:, :,4:]
    acc_true = tf.math.sqrt(x**2 + y**2) > 0.5
    acc_true = K.cast(acc_true, 'float32')

    x_ = pred[:, :,:4] 
    y_ = pred[:, :,4:]
    acc_pred = tf.math.sqrt(x_**2 + y_**2) > 0.7
    acc_pred = K.cast(acc_pred, 'float32')    
    return tf.keras.metrics.categorical_accuracy(acc_true, acc_pred)

def mae_masked_accdoa(y_gt, pred):
    x = y_gt[:, :,:4] 
    y = y_gt[:, :,4:]
    test = tf.math.sqrt(x**2 + y**2) > 0.6
    test = tf.cast(test, tf.float32)
    x_new = x * test
    y_new = y * test
    
    x_ = pred[:, :,:4] 
    y_ = pred[:, :,4:]
    x_new_ = x_ * test
    y_new_ = y_ * test
    
    direction = tf.math.atan2(y_new_, x_new_) 
    direction = direction/tf.constant(math.pi)*180
    direction2 = tf.math.atan2(y_new, x_new) 
    direction2 = direction2/tf.constant(math.pi)*180
    return K.mean(K.abs(direction - direction2), -1)


########################
### DOA
########################
def masked_mse(y_gt, model_out):
    sed_out = y_gt[:, :, 2:3] 
    sed_out = tf.concat([sed_out,sed_out], axis = 2)
    x = K.square(model_out[:, :, :] - y_gt[:, :, :2])
    return tf.reduce_sum(x * sed_out, 1)/(tf.reduce_sum(sed_out, 1) + 1e-7)

def masked_mae(y_gt, model_out):
    sed_out = y_gt[:, :, 2:3]  #TODO fix this hardcoded value of number of classes
    sed_out = tf.cast(tf.concat([sed_out, sed_out], axis = 2), tf.float32)
    direction = tf.math.atan2(model_out[:, :, 1:2],model_out[:, :, 0:1])
    direction = direction/tf.constant(math.pi)*180 + 180
    direction2 = tf.math.atan2(y_gt[:, :, 1:2], y_gt[:, :, 0:1])
    direction2 = direction2/tf.constant(math.pi)*180 + 180
    
    
    diff = direction2 - direction
    weight = K.abs(diff)
    fill_value_if_bigger = 360.-diff
    threshold = 270.
    weight = tf.where(tf.greater(diff, threshold), fill_value_if_bigger, diff)
    weight = tf.abs(weight)
    return tf.reduce_sum(weight * sed_out)/tf.reduce_sum(sed_out + 1e-7)



def masked_mae2(y_gt, model_out):

    sed_out = y_gt[:, :, 2:3]
    sed_out = tf.concat([sed_out,sed_out], axis = 2)
    direction = tf.math.atan2(model_out[:, :, 1:2], model_out[:, :, :1])
    direction = direction/tf.constant(math.pi)*180
    direction2 = tf.math.atan2(y_gt[:, :, 1:2], y_gt[:, :, :1])
    direction2 = direction2/tf.constant(math.pi)*180
    diff = direction2 - direction
    diff = K.abs(diff) * sed_out
    fill_value_if_bigger = 0.
    threshold = 180.
    weight = tf.where(tf.greater(diff, threshold), fill_value_if_bigger, diff)
    
    return K.mean(weight, -1)

def masked_mae_original(y_gt, model_out):
    sed_out = y_gt[:, :, 2:3]
    sed_out = tf.concat([sed_out,sed_out], axis = 2)
    direction = tf.math.atan2(model_out[:, :, 1:2], model_out[:, :, 0:1])
    direction = direction/tf.constant(math.pi)*180
    direction2 = tf.math.atan2(y_gt[:, :, 1:2], y_gt[:, :, 0:1])
    direction2 = direction2/tf.constant(math.pi)*180
    
    
    diff = direction2 - direction
    weight = K.abs(diff)
    # tf.print(tf.reduce_sum(sed_out))
    return tf.reduce_sum(weight * sed_out)/tf.reduce_sum(sed_out + 1e-7)

def custom_accuracy_categorical(y_true,y_pred):

    y_true_class = K.argmax(y_true, axis=-1) + 1
    y_pred_class = K.argmax(y_pred, axis=-1) + 1    
    weight_ = tf.abs(y_true_class - y_pred_class)
    weight = tf.cast(weight_, tf.int32)
    weight = tf.where(tf.math.greater(weight_, 3),  0, weight)
    weight = tf.where(tf.math.less_equal(weight_, 3),  1, weight)
    
    a = weight
    
    return K.mean(a)

def custom_accuracy(y_true, y_pred):
    mask = tf.not_equal(tf.reduce_max(y_true, -1), 0)
    y_true = tf.boolean_mask(y_true, mask)
    y_pred = tf.boolean_mask(y_pred, mask)
    # y_true_class = K.argmax(y_true, axis=-1)
    # y_pred_class = K.argmax(y_pred, axis=-1)

    return tf.keras.metrics.categorical_accuracy(y_true, y_pred)

def accdoa_accuracy(y_true, y_pred):
    y_pred = tf.math.sqrt(y_pred[:,:,:4]**2 + y_pred[:,:,4:]**2)
    y_true = tf.math.sqrt(y_true[:,:,:4]**2 + y_true[:,:,4:]**2)
    mask = tf.greater_equal(tf.reduce_max(y_true, -1), 0.5)
    y_pred = tf.boolean_mask(y_pred, mask)
    y_true = tf.boolean_mask(y_true, mask)
    thus = tf.cast(tf.equal(tf.argmax(y_true, -1), tf.argmax(y_pred, -1)), tf.float32)
    return tf.reduce_mean(thus)



def categorical_weighted(target, output):
    mask = tf.not_equal(tf.reduce_max(target, -1), 0)
    y_true_c = tf.boolean_mask(target, mask)
    y_pred_c = tf.boolean_mask(output, mask)
    mask_b = tf.equal(tf.reduce_max(target, -1), 0)
    y_true_b = tf.boolean_mask(target, mask_b)
    y_pred_b = tf.boolean_mask(output, mask_b)

    return tf.keras.losses.CategoricalCrossentropy(from_logits=False)(y_true_c, y_pred_c) + tf.keras.losses.BinaryCrossentropy(from_logits=False)(y_true_b, y_pred_b)


checkpoint = keras.callbacks.ModelCheckpoint('saved_doa_model/class_{epoch:03d}.h5', period=10) 
checkpoint2 = keras.callbacks.ModelCheckpoint('saved_doa_model/class_{epoch:03d}.h5', monitor='val_doa_out_masked_mae', verbose=1,
                             save_best_only=True, save_weights_only=False,
                             mode='auto', save_frequency=1) 

initial_learning_rate = 0.0001
final_learning_rate = 0.000005
learning_rate_decay_factor = (final_learning_rate / initial_learning_rate)**(1/500)
steps_per_epoch = int(156)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                initial_learning_rate=initial_learning_rate,
                decay_steps=steps_per_epoch,
                decay_rate=learning_rate_decay_factor,
                staircase=True)


print("============== MODEL TRAINING ==============")

# AZIMUTH
keras.backend.set_image_data_format('channels_last')



model = keras_model_doa.get_model_newest((256,128,7), dropout_rate=0., nb_cnn2d_filt=128, f_pool_size=[8, 8, 4], t_pool_size=[1, 1, 1],
              rnn_size=[128, 128], fnn_size=[128], output = 2, activation='tanh')


model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss = {'doa_out':masked_mse, 'sed_out':categorical_weighted},
              metrics= {'doa_out':[masked_mae, masked_mae2, masked_mae_original], 'sed_out':[custom_accuracy]}, loss_weights=[1, 0.1])


model.summary()

# Load Checkpoint

history = model.fit(data_train,
                    epochs=250,
                    verbose=1,
                    validation_data = data_val,
                    callbacks = [checkpoint, checkpoint2])

# history = model.evaluate(data_val)

model.save('saved_doa_model/first_model.h5')
pd.DataFrame.from_dict(model.history.history).to_csv(f'unet_model_orm/history/first_model.csv',index=False)


com.logger.info("save_model -> {}".format('unet_model_wind/leveraging_elu.h5'))

print("============== END TRAINING ==============")


gc.collect()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 128, 7  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 256, 128, 12  8192        ['input_1[0][0]']                
                                8)                                                                
                                                                                                  
 batch_normalization (BatchNorm  (None, 256, 128, 12  512        ['conv2d[0][0]']                 
 alization)                     8)                                                          

2023-07-23 23:25:23.555504: W tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc:727] None of the algorithms provided by cuDNN heuristics worked; trying fallback algorithms.  Conv: (f32[1,1,512,512]{2,1,3,0}, u8[0]{0}) custom-call(f32[1,1,512,512]{2,1,3,0}, f32[1,1,1,512]{1,0,2,3}), window={size=1x1}, dim_labels=b01f_01io->b01f, feature_group_count=512, custom_call_target="__cudnn$convForward", backend_config="{\"conv_result_scale\":1,\"activation_mode\":\"0\",\"side_input_scale\":0}"


 29/156 [====>.........................] - ETA: 34:19 - loss: 0.6917 - doa_out_loss: 0.4991 - sed_out_loss: 1.9261 - doa_out_masked_mae: 104.8324 - doa_out_masked_mae2: 41.8093 - doa_out_masked_mae_original: 109.1367 - sed_out_custom_accuracy: 0.3694

2023-07-24 00:05:44.771823: W tensorflow/core/kernels/data/cache_dataset_ops.cc:296] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Epoch 1: val_doa_out_masked_mae improved from inf to 78.78399, saving model to saved_doa_model/class_001.h5
Epoch 2/250
Epoch 2: val_doa_out_masked_mae did not improve from 78.78399
Epoch 3/250
Epoch 3: val_doa_out_masked_mae improved from 78.78399 to 77.28704, saving model to saved_doa_model/class_003.h5
Epoch 4/250
Epoch 4: val_doa_out_masked_mae improved from 77.28704 to 56.85054, saving model to saved_doa_model/class_004.h5
Epoch 5/250
Epoch 5: val_doa_out_masked_mae improved from 56.85054 to 42.38284, saving model to saved_doa_model/class_005.h5
Epoch 6/250
Epoch 6: val_doa_out_masked_mae did not improve from 42.38284
Epoch 7/250
Epoch 7: val_doa_out_masked_mae did not improve from 42.38284
Epoch 8/250
Epoch 8: val_doa_out_masked_mae improved from 42.38284 to 33.36450, saving model to saved_doa_model/class_008.h5
Epoch 9/250
Epoch 9: val_doa_out_masked_mae did not improve from 33.36450
Epoch 10/250
Epoch 10: val_doa_out_masked_mae improved from 33.36450 to 29.95975, saving model t