In [12]:
import torch
import numpy as np
import pandas as pd
import os
import librosa

##  1

In [29]:
def align_meta_pt(df,
               n_sample,
               shift=256,
               n_fft=1024,
               hop_meta_s=0.1,
               fs=24000,
               max_n_target = 6
              ):
    hop_meta = fs*hop_meta_s
    ratio = hop_meta/shift
    n_frame = int(np.ceil(n_sample/shift)+1)
    
    out = torch.zeros(n_frame,max_n_target,3) # 3[class,azimuth,elevation]
    out[:,:,0] = -1 # init
    
    ## 
    
    for idx in df.index :
        idx_start = int(df.iloc[idx,0]*ratio)
        idx_end = int((df.iloc[idx,0]+1)*ratio)
        
        cnt=0
        while out[idx_start,cnt,0] != -1 :
            cnt+=1
        
        out[idx_start:idx_end,cnt,0] = df.iloc[idx,1] # class
        out[idx_start:idx_end,cnt,1] = df.iloc[idx,3] # azimuth
        out[idx_start:idx_end,cnt,2] = df.iloc[idx,4] # elevation
    
    return out

## 2

In [30]:
def label2mACCDOA(label,n_class=13,n_track=2) :

    n_frame = label.shape[0]
    max_n_target = label.shape[1]

    mACCDOA = torch.zeros((n_frame,n_track,n_class,3))

    ## convert polar to cartesian
    for i in range(n_frame) : 
        j = 0
        # for all target
        while j < max_n_target and label[i,j,0] != -1:
            if label[i,j,0] == -1 :
                break
            aiz_rad =  label[i,j,1]*torch.pi/180.
            ele_rad =  label[i,j,2]*torch.pi/180.

            x = torch.cos(aiz_rad)*torch.cos(ele_rad)
            y = torch.sin(aiz_rad)*torch.cos(ele_rad)
            z = torch.sin(ele_rad)

            category = int(label[i,j,0])

            # iter all tracks in target category
            k = 0
            while  k <= n_track :
                # more than n_track, replace previous label randomly
                if k == n_track : 
                    mACCDOA[i,np.random.randint(n_track),category,:]=torch.tensor(([x,y,z]))
                    break

                # current track is empty
                if torch.norm(mACCDOA[i,k,category,:]) == 0 : 
                    mACCDOA[i,k,category,:]=torch.tensor(([x,y,z]))
                    break
                k+=1
            j += 1

    return mACCDOA




In [80]:
def mACCDOA2label(mACCDOA,shift_in=256,shift_out=2400,threshold=0.5) : 

    n_frame = mACCDOA.shape[0]
    n_track = mACCDOA.shape[1]
    n_class = mACCDOA.shape[2]

    ret_label = pd.DataFrame(columns=["idx","class","order","azimuth","elevation"])
    ratio =  shift_in/shift_out
    print(ratio)
    
    prev_idx = -1
    # conversion
    for i_frame in range(n_frame) : 
        cur_idx_start = int(i_frame*ratio)
        cur_idx_end = int(i_frame*ratio)
        
        if prev_idx == cur_idx_start:
            continue
        else :
            prev_idx = cur_idx_start
        
        active = 0
        
        for i_track in range(n_track) : 
            for i_class in range(n_class):
                # active label
                if torch.norm(mACCDOA[i_frame,i_track,i_class,:]) > threshold :
                
                    x, y, z = mACCDOA[i_frame,i_track,i_class,:]

                    # in degrees
                    azimuth = int(np.round((np.arctan2(y, x) * 180 / np.pi).numpy()))
                    elevation = int(np.round((np.arctan2(z, np.sqrt(x**2 + y**2)) * 180 / np.pi).numpy()))
                    
                    for i_idx in range(cur_idx_start,cur_idx_end+1): 
                        ret_label.loc[len(ret_label.index)] = [i_idx, i_class, active, azimuth, elevation]
        active += 1
    return ret_label

In [81]:
raw,_ = librosa.load("fold1_room10_mix001.wav",sr=24000,mono=False)
#print(raw.shape)

n_channel, total_sample = raw.shape

df = pd.read_csv(os.path.join("fold1_room10_mix001.csv"),names=["idx","class","order","azimuth","elevation"])
display(df.head())
pt_label = align_meta_pt(df,total_sample)
print("label {} -> pt {}".format(len(df.index),pt_label.shape[0]))
accdoa = label2mACCDOA(pt_label)
print(accdoa[:2,0])
print("pt {} -> accdoa {}".format(pt_label.shape[0],accdoa.shape[0]))
inv_label = mACCDOA2label(accdoa)
print("accdoa {} -> label {}".format(accdoa.shape[0],len(inv_label.index)))
display(inv_label.head())

Unnamed: 0,idx,class,order,azimuth,elevation
0,10,4,0,-131,0
1,11,4,0,-135,0
2,12,4,0,-139,0
3,13,4,0,-143,-1
4,14,4,0,-147,-1


label 757 -> pt 5626
tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])
pt 5626 -> accdoa 5626
0.10666666666666667
accdoa 5626 -> label 757


Unnamed: 0,idx,class,order,azimuth,elevation
0,10,4,0,-131,0
1,11,4,0,-135,0
2,12,4,0,-139,0
3,13,4,0,-143,-1
4,14,4,0,-147,-1


In [81]:
raw,_ = librosa.load("fold1_room10_mix001.wav",sr=24000,mono=False)
#print(raw.shape)

n_channel, total_sample = raw.shape

df = pd.read_csv(os.path.join("fold1_room10_mix001.csv"),names=["idx","class","order","azimuth","elevation"])
display(df.head())
pt_label = align_meta_pt(df,total_sample)
print("label {} -> pt {}".format(len(df.index),pt_label.shape[0]))
accdoa = label2mACCDOA(pt_label)
print(accdoa[:2,0])
print("pt {} -> accdoa {}".format(pt_label.shape[0],accdoa.shape[0]))
inv_label = mACCDOA2label(accdoa)
print("accdoa {} -> label {}".format(accdoa.shape[0],len(inv_label.index)))
display(inv_label.head())

Unnamed: 0,idx,class,order,azimuth,elevation
0,10,4,0,-131,0
1,11,4,0,-135,0
2,12,4,0,-139,0
3,13,4,0,-143,-1
4,14,4,0,-147,-1


label 757 -> pt 5626
tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])
pt 5626 -> accdoa 5626
0.10666666666666667
accdoa 5626 -> label 757


Unnamed: 0,idx,class,order,azimuth,elevation
0,10,4,0,-131,0
1,11,4,0,-135,0
2,12,4,0,-139,0
3,13,4,0,-143,-1
4,14,4,0,-147,-1


In [79]:
aa = "abcd"

aa[:-1]

'abc'

In [83]:
import numpy as np

np.min((5,4))

4

In [88]:
try : 
    np.load("/home/data/kbh/DCASE2022/TAU-SRIR_DB_split/rirs_10_tc352_2_1.npy")
except:
    print("E")
    

E
