In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import re
import tensorflow as tf
import math
import matplotlib.cm as cm

2025-03-28 19:25:00.486904: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743189900.497406  334749 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743189900.500628  334749 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-28 19:25:00.512956: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Extract Data

In [11]:
class DataExtract:
    def __init__(self, path_dir, n_channels):
        # private variable
        # path to data
        self._path_dir = path_dir
        # number of channels in the array
        self._n_channels = n_channels
        # list of time delay file names
        self._files_tau = None
        # list of signal information file names
        self._files_dat = None
        # time delays/taus/inputs
        self._time_delays = None
        # normalized value
        self.norm = None
        # reference points
        self.ref_points = None
        # angles/output
        self._angles = None
        # speeds
        self._speeds = None
        # subset of time delays and angles and speeds to return
        self.time_delays = None
        self.angles = None
        self.speeds = None
        
        # extract angels from dat files
        self._extract_signal_info()
        # extract taus from bin files
        self._extract_delays()
        # preprocess data/max normalization
        self._max_norm()
        # fixed reference channel/channel with zero time delay
        self._fixed_ref_channel()
        # set a upper and lower angles limit
        self.set_bound()


    # This function uses for sorting files
    def _key(self, file):
        # Sort by character at 26th place (digit)
        file_num = int(re.split('([0-9]+)',file)[1])
        return(file_num)
        
    def _extract_signal_info(self):
        # Source signal information files
        self._files_dat = [i for i in os.listdir(self._path_dir) if os.path.isfile(os.path.join(self._path_dir,i)) and \
                 '.dat' in i]
        self._files_dat = sorted(self._files_dat, key = self._key)   
        
        # Extract signal information
        dats = []
        for file in self._files_dat:
            with open(self._path_dir+file, 'r') as f:
                lines = f.readlines()
                for i,l in enumerate(lines):
                    if (i>0):
                        dat = [float(i) for i in l.split()]
                        dats.append(dat)
        self._speeds = np.array(dats, dtype=np.float32)[:,1]
        self._speeds = np.reshape(self._speeds, [self._speeds.shape[0], 1])
        self._angles = np.array(dats, dtype=np.float32)[:,2]
        self._angles = np.reshape(self._angles, [self._angles.shape[0], 1])

    def _extract_delays(self):
        # Time delay files
        self._files_tau = [i for i in os.listdir(self._path_dir) if os.path.isfile(os.path.join(self._path_dir,i)) and \
                           'tau.bin' in i]
        self._files_tau = sorted(self._files_tau, key = self._key)   
        
        # Extract Time delay
        taus = []
        for file in self._files_tau:
            with open(self._path_dir+file, 'rb') as f:
                delay = f.read()
                delay = np.frombuffer(delay, dtype = np.float32)
                delay = np.resize(delay, (int(len(delay)/self._n_channels), self._n_channels))
                taus.append(delay)
        taus_np = np.array(taus, dtype=np.float32)
        self._time_delays = taus_np.reshape(taus_np.shape[0]*taus_np.shape[1], taus_np.shape[2])
        
    def _max_norm(self):
        abs_max = np.max(np.abs(self._time_delays), axis = (0,1))
        self._time_delays /= abs_max
        self.norm = abs_max

    def _fixed_ref_channel(self):
        self.ref_points = self._time_delays[:,[0]]
        self._time_delays -= self.ref_points

    def set_bound(self, lower = 0.0, upper = 360.0):
        # get data in a bounded angles
        fillter = np.logical_and(self._angles >= lower, self._angles <= upper).flatten()
        self.time_delays = self._time_delays[fillter]
        self.angles = self._angles[fillter]
        self.speeds = self._speeds[fillter]

    def get_norm(self):
        return self.norm
        
    def get_ref(self):
        return self.ref_points
        
    def get_data(self, channels=None, bound=[0.0,360.0]):
        # If no channels are specify
        # Default to channels from 0 to 23
        if not channels:
            channels = np.arange(0,24, dtype=int)
        return self.time_delays[:,channels], self.speeds, self.angles
        
    def plot(self, channels = None):
        if not channels:
            channels = list(range(0,self._n_channels))
        # Time delay vs angle at different channel
        fig = plt.figure(figsize = (20,20))
        ax = plt.axes()
        colors = cm.rainbow(np.linspace(0, 1, self._n_channels))
        for i in range(len(channels)):
            ax.scatter(self.angles, self.time_delays[:,channels[i]], label="ch{}".format(i), s=0.5,color=colors[channels[i]])
        plt.title("Time Delay vs. DOA for Each Channel")
        ax.grid()
        ax.set_xlabel('DOA (degree)', fontweight ='bold') 
        ax.set_ylabel('Time Delay (ms)', fontweight ='bold') 
        ax.legend(markerscale = 10)


In [66]:
class DataSetPacker:
    def __init__(self, inputs, speeds, labels, channels=[]):
        self._inputs = inputs
        self._labels = labels
        self._speeds = speeds
        self._channels = channels
        self._dataset = None
        self._inputs_subset = None
        
        self._dataset_train = None
        self._dataset_test = None
        self._dataset_val = None

        self._selected_channels()
        self._pack_data()
    def _selected_channels(self):
        # get the selected channels
        if self._channels:
            self._inputs_subset = self._inputs[:,self._channels]
        else:
            self._inputs_subset = self._inputs
    def _pack_data(self):
        # pack data into tensorflow dataset
        self._dataset = tf.data.Dataset.from_tensor_slices((self._inputs_subset, self._speeds, self._labels))
        
    def _shuffle_dataset(self, buffer_size):
        # shuffle dataset
        self._dataset = self._dataset.shuffle(buffer_size = buffer_size)
        
    def split(self, ratio=[0.7,0.15,0.15], shuffle=True, shuffle_buffer_size = 5):
        # return splits
        dataset_size = len(self._inputs)
        if shuffle:
            self._shuffle_dataset(shuffle_buffer_size)
        if len(ratio) == 3:
            train_size = int(ratio[0]*dataset_size)
            val_size = int(ratio[1]*dataset_size)
            test_size = int(ratio[2]*dataset_size)
            self._dataset_train = self._dataset.take(train_size)
            self._dataset_test = self._dataset.skip(train_size)
            self._dataset_val = self._dataset.skip(test_size)
            self._dataset_test = self._dataset.take(test_size)
            return self._dataset_train, self._dataset_val, self._dataset_test
        else:
            train_size = int(ratio[0]*dataset_size)
            test_size = int(ratio[1]*dataset_size)
            self._dataset_train = self._dataset.take(train_size)
            self._dataset_test = self._dataset.skip(train_size)
            return self._dataset_train, self._dataset_test
    def plot(self):
        # Time delay vs angle at different channel
        fig = plt.figure(figsize = (20,20))
        ax = plt.axes()
        colors = cm.rainbow(np.linspace(0, 1, 24))
        for i in range(len(self._channels)):
            ax.scatter(self._labels, self._inputs_subset[:,i], label="ch{}".format(self._channels[i]), color=colors[self._channels[i]])
        plt.title("Time Delay vs. DOA for Each Channel")
        ax.set_xlabel('DOA (degree)', fontweight ='bold') 
        ax.set_ylabel('Time Delay (ms)', fontweight ='bold') 
        ax.legend()

In [13]:
class Visualizer:
    def __init__(self, AA_geometry):
        self._AA_geometry_cart = AA_geometry
        self._AA_geometry_polar = np.empty(AA_geometry[:,:2].shape)
        self._AA_geometry_sphe = np.empty(AA_geometry.shape)
        self._cart2pol()
        self._cart2sphe()
    def cartesian2D(self, fig_size = (12,12)):
        fig = plt.figure(figsize = fig_size)
        ax = plt.axes()
        for i, channel in enumerate(self._AA_geometry_cart):
            ax.scatter(channel[0],channel[1], label = "ch{}".format(i))
            ax.text(channel[0], channel[1], '  %s'%(str(i)))
        plt.title("Array Cartesian Top View", pad = 25)
        ax.set_aspect('equal', adjustable='box')
        ax.set_xlabel('X-axis', fontweight ='bold') 
        ax.set_ylabel('Y-axis', fontweight ='bold') 
        ax.margins(0.2)
        ax.legend()
        #plt.tight_layout()
    def cartesian3D(self, fig_size = (12,12)):
        fig = plt.figure(figsize = fig_size)
        ax = plt.axes(projection = "3d")
        for i, channel in enumerate(self._AA_geometry_cart):
            ax.scatter(channel[0],channel[1],channel[2], label = "ch{}".format(i))
            ax.text(channel[0], channel[1], channel[2], '  %s'%(str(i)), position=(1,1))
        plt.title("Array Cartesian 3D View", pad = 25)
        ax.set_xlabel('X-axis', fontweight ='bold') 
        ax.set_ylabel('Y-axis', fontweight ='bold') 
        ax.set_zlabel('Z-axis', fontweight ='bold')
        ax.legend()
    def polar(self, angle_1 = None, angle_2 = None, channels = [], taus=[], fig_size = (15,14), save_dir = None):
        fig = plt.figure(figsize = fig_size)
        ax = plt.axes(projection = "polar")
        colors = cm.rainbow(np.linspace(0, 1, 24))
        j = 0
        for i, channel in enumerate(self._AA_geometry_polar):
            if i in channels:
                ax.scatter(channel[1],channel[0], s=300*(1+taus[j]), marker="X", label = "ch{}".format(i), color=colors[i])
                j += 1
                ax.text(channel[1], channel[0], '  %s'%(str(i)) )
                continue
            #ax.scatter(channel[1],channel[0], label = "ch{}".format(i), color=colors[i])
            #ax.text(channel[1], channel[0], '  %s'%(str(i)) )
            #ax.set_axisbelow(True)
                
        if angle_1:
            angle = angle_1 * np.pi / 180
            ax.vlines(angle,0,0.12, colors = 'r', label='actual')
            ax.text(angle,0,0.12, " %s"%(str(angle)))
        if angle_2:
            angle = angle_2 * np.pi / 180
            ax.vlines(angle,0,0.12, colors = 'b', label='predict')
            ax.text(angle,0,0.12, " %s"%(str(angle)))
        ax.legend()
        plt.title("Array Polar View", pad = 25)
        ax.margins(0.2)
        if save_dir:
            fig.savefig(save_dir+'AA_polar.png')
            
    def plot_dataset(self, dataset):
        input_dataset = []
        label_dataset = []
        for x,_,y in dataset:
            input_dataset.append(x)
            label_dataset.append(y)
        input_dataset = np.array(input_dataset)
        label_dataset = np.array(label_dataset)
        
        fig = plt.figure(figsize = (20,20))
        ax = plt.axes()
        for i in range(input_dataset.shape[1]):
            ax.scatter(label_dataset, input_dataset[:,[i]], label="ch{}".format(i))
        plt.title("Time Delay vs. DOA for Each Channel")
        ax.set_xlabel('DOA (degree)', fontweight ='bold') 
        ax.set_ylabel('Time Delay (ms)', fontweight ='bold') 
        ax.legend()
    def _cart2pol(self):
        for i, channel in enumerate(AA_Geometry):
            x = channel[0]
            y = channel[1]
            r = np.sqrt(x**2 + y**2)
            if x == 0:
                theta = np.pi/2 if y > 0 else -np.pi/2
            else:
                theta = np.arctan(y/x)# * 180 / np.pi
                theta = theta if x > 0 else theta + np.pi
            self._AA_geometry_polar[i][0] = r
            self._AA_geometry_polar[i][1] = theta
    def _cart2sphe(self):
        pass
        #for i in range(self._AA_geometry_cart.shape[1]):
        #    x = self._AA_geometry_cart[0][i] + 0.0001
        #    y = self._AA_geometry_cart[1][i] + 0.0001
        #    z = self._AA_geometry_cart[2][i] + 0.0001
        #    
        #    r = np.sqrt(x**2 + y**2 + z**2)
        #    theta = np.arctan(y/x)
        #    phi = np.arccos(z/r)
        #    self._AA_geometry_sphe[:,i] = np.array([r,theta,phi])
        


In [1]:
class EvaluateModel:
    def __init__(self, model, channel_id, dataset):
        self._model = model
        self._channel_id = channel_id
        self._dataset = dataset
    def plot_training(self, losses):
        metrics = list(losses.history.keys())
        fig, axs = plt .subplots(ncols = 3, nrows=1, figsize=(12,3), layout="constrained")
        for i in range(3):
            axs[i].plot(losses.history[metrics[i]], label="train_{}".format(metrics[i]))
            axs[i].plot(losses.history[metrics[i+3]], label=metrics[i+3])
            axs[i].set_yscale('log')
            axs[i].legend()
        fig.supxlabel("epoch")
        fig.suptitle("Channels: {}".format(" ".join(str(ch) for ch in self._channel_id)))

    def plot_evaluation(self, lower=0.0, upper=360.0, verbose = False, save_dir = None):
        fig, axs = plt.subplots(ncols = 1, nrows = len(self._channel_id) + 2, figsize = (20,len(self._channel_id)*3), layout="constrained")
        for x,c,y in self._dataset.batch(len(self._dataset)):
            # Targeting a specific range
            fillter = np.logical_and(y >= lower, y <= upper).flatten()
            x, y = x[fillter], y[fillter]
            #y_pred, c_pred = self._model.predict(x, verbose=False)
            y_pred = self._model.predict(x, verbose=False)
            if verbose:
                print("Inputs")
                print(x)
                print("Y Truth", "Y Predict")
                print(np.stack((y, y_pred),axis=1).squeeze(-1))
            for i in range(x.shape[1]):
                axs[i].scatter(y, x[:,i], color='r', s=5, label="actual")
                axs[i].scatter(y_pred, x[:,i], color='b', s=5, label="predict")
                axs[i].set_title("Channel {}".format(self._channel_id[i]))
                axs[i].grid()
                axs[i].legend()

            # Absolute different
            axs[-2].scatter(y, y-y_pred, color='g', s=5)
            axs[-2].set_title("Truth vs Predict absolute different")
            axs[-2].grid()
            
            # Relative different
            y_diff = lambda y, y_hat : ((y - y_hat) + 180) % 360 -180
            axs[-1].scatter(y, y_diff(y, y_pred), color='g', s=5)
            axs[-1].set_title("Truth vs Predict relative different")
            axs[-1].grid()
        fig.supxlabel('Theta')
        fig.supylabel('Taus')
        if save_dir:
            ch2str = ""
            for ch in channel_id:
                ch2str += "_{}".format(ch)
            fig.savefig(save_dir + "evaluation_ch{}.png".format(ch2str))
                            
    def evaluate(self, verbose=False):
        eva = self._model.evaluate(self._dataset.batch(100), verbose=False)
        if verbose:
            print(eva)
        return eva

In [21]:
def genRandomAA(num_channels, list_length):
    list_channels = np.empty([list_length, num_channels], dtype=int)
    for i in range(list_length):
        list_channels[i] = random.sample(range(0,24), num_channels)
    return list_channels

In [23]:
def evaluates_table(model, list_channels, evaluates):
    df = pd.DataFrame(np.zeros((len(list_channels), 5), dtype=object), columns=["Activation", "Channels", "Loss", "Mae", "Mse"])
    for i in range(len(list_channels)):
        df.at[i, "Activation"] = model
        df.at[i, "Channels"] = list_channels[i]
        df.at[i, "Loss"] = evaluates[i][0]
        df.at[i, "Mae"] = evaluates[i][2]
        df.at[i, "Mse"] = evaluates[i][1]
    return df

In [1]:
def plot_table(df, col_name, threshold=1):
    # Initialize matplot
    fig = plt.figure(figsize=(10,15))
    ax = plt.axes()
    # convert pandas dataframe to numpy
    target_values = df[col_name].to_numpy()
    # number of channels or length
    try:
        # case if the dataframe entries are in string
        num_channels = np.array([len(re.findall(r'\d+',item['Channels'])) for i,item in df.iterrows()])
    except:
        # case if the dataframe entries are object
        num_channels = np.array([len(item['Channels']) for i,item in df.iterrows()])
    mean = target_values.mean()
    std = target_values.std()
    # z scores
    zs = (target_values - mean) / std
    # Find the target values within the threshold if not mask with NaN
    plot_points = np.where(zs<threshold,target_values,np.nan)
    # Plot points vs number of channels
    #ax.scatter(plot_points, num_channels, s = 5) 
    plot = ax.scatter(num_channels, plot_points, s = 8) 
    ax.grid(linestyle='--')
    ax.set_xticks(list(range(3,25,3)))
    ax.set_xlabel('Number of Channels', fontweight ='bold') 
    ax.set_ylabel(col_name, fontweight ='bold') 
    plt.title("Models Performance by " + col_name)
    return plot