In [1]:
import librosa
import numpy as np
import scipy as sp
import scipy.io.wavfile
import os
import pandas as pd
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import argparse
import soundfile as sf
import pandas as pd

## Compute features

In [21]:
target_rate = 16000

def compute(in_path, folder):
    
    # read wav file
    rate, sig = sp.io.wavfile.read(filename=in_path)
    sig = np.float32(sig) / np.float32(2 ** (16 - 1))
    sig = librosa.resample(sig, rate, target_rate)
    rate = target_rate

    # features parameters
    feat_frame_length_sec = 0.05
    feat_hop_size_sec = 0.025

    frame_length = int(rate * feat_frame_length_sec)
    feat_hop_size = int(rate * feat_hop_size_sec)
    N_MFCC = 22

    # compute features
    mfcc = librosa.feature.mfcc(sig, sr=rate,n_mfcc=N_MFCC,n_fft=frame_length, hop_length=feat_hop_size)
    sc = librosa.feature.spectral_centroid(sig, sr=rate, n_fft=frame_length, hop_length=feat_hop_size)
    sfl = librosa.feature.spectral_flatness(sig, n_fft=frame_length, hop_length=feat_hop_size)
    sroff = librosa.feature.spectral_rolloff(sig, n_fft=frame_length, hop_length=feat_hop_size, sr=rate)
    zcr = librosa.feature.zero_crossing_rate(sig, frame_length=frame_length, hop_length=feat_hop_size)
    rms = librosa.feature.rms(sig,frame_length=frame_length, hop_length=feat_hop_size)

    # store features into dictionary
    features = {}
    features['mfcc'] = mfcc
    features['sc'] = sc
    features['sfl'] = sfl
    features['sroff'] = sroff
    features['zcr'] = zcr
    features['rms'] = rms

    # create destination directory
    dest_path = in_path.replace(folder, 'features_'+folder)
    dest_path = dest_path.replace('wav', 'npy')
    np.save(dest_path, features)

    return

In [22]:
single_audio_path = '/nas/home/cborrelli/speech_forensics/rai/single'

audio_list = [o for o in os.listdir(single_audio_path) if o.endswith('wav')]

for a in audio_list:
    
    in_path = os.path.join(single_audio_path, a)
    compute(in_path, 'single')
    break

In [5]:
multiple_audio_path = '/nas/home/cborrelli/speech_forensics/rai/multiples'

audio_list = [o for o in os.listdir(multiple_audio_path) if o.endswith('wav')]

for a in audio_list:
    
    in_path = os.path.join(multiple_audio_path, a)
    compute(in_path, 'multiples')

## Generate dataframe

In [13]:
columns = ['path', 'noise', 'snr', 'mfcc', 'sfl', 'sc', 'sroff', 'zcr', 'rms', 'y_value', 'y_label', 'y_bin_label']

feat_df = pd.DataFrame(columns=columns)


single_features_folder = '/nas/home/cborrelli/speech_forensics/rai/features_single'
multiple_features_folder = '/nas/home/cborrelli/speech_forensics/rai/features_multiples'
single_audio_folder = '/nas/home/cborrelli/speech_forensics/rai/single'
multiple_audio_folder = '/nas/home/cborrelli/speech_forensics/rai/multiples'

annotation_folder = '/nas/home/cborrelli/speech_forensics/rai/maj_results'

single_feature_list = [ o for o in os.listdir(single_features_folder) if o.endswith('npy')]

noise = 'rai'
snr = None
binary_label = 1
for f in single_feature_list:
    
    file_name = f.replace('.npy', '')
    
    feature_path = os.path.join(single_features_folder, f)

    x = np.load(feature_path, allow_pickle=True)
    mfcc = x.item().get('mfcc')

    sfl = x.item().get('sfl')[0]
    sc = x.item().get('sc')[0]
    sroff = x.item().get('sroff')[0]
    zcr = x.item().get('zcr')[0]
    rms = x.item().get('rms')[0]


    audio_path = os.path.join(single_audio_folder ,file_name+'.wav')
    label = 2
    newrow = pd.DataFrame(
        [[audio_path, noise, snr, [mfcc], sfl, sc, sroff, zcr, rms, None, label, binary_label]],
        columns=columns)
    feat_df = feat_df.append(newrow, ignore_index=True)


In [14]:
multiple_feature_list = [ o for o in os.listdir(multiple_features_folder) if o.endswith('npy')]
binary_label = 0
for f in multiple_feature_list:
    
    file_name = f.replace('.npy', '')
    
    feature_path = os.path.join(multiple_features_folder, f)

    x = np.load(feature_path, allow_pickle=True)
    mfcc = x.item().get('mfcc')

    sfl = x.item().get('sfl')[0]
    sc = x.item().get('sc')[0]
    sroff = x.item().get('sroff')[0]
    zcr = x.item().get('zcr')[0]
    rms = x.item().get('rms')[0]

    txt_name = os.path.join(annotation_folder, file_name+'.txt')
    
    if not os.path.isfile(txt_name):
        print('File {} annotation is missing'.format(txt_name))
        continue     
    with open(txt_name,'r') as txt_file:
        intell_label = txt_file.readline().rstrip()
        
    if intell_label == 'Simple':
        label = 1
    elif intell_label == 'Medium':
        label = 0
    else:
        label = 0
        
    audio_path = os.path.join(multiple_audio_folder ,file_name+'.wav')
    newrow = pd.DataFrame(
        [[audio_path, noise, snr, [mfcc], sfl, sc, sroff, zcr, rms, None, label, binary_label]],
        columns=columns)
    feat_df = feat_df.append(newrow, ignore_index=True)


File /nas/home/cborrelli/speech_forensics/rai/maj_results/71e092e8-a35d-4733-aa3b-e97120b52354_5.605527858202105-14.999977324263039.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/1666f31e-6dc5-420e-afbc-3c3407be72d4_1.6108520683100032-3.66072864788718.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/08411abf-0fe6-42c4-8393-b35388990c8e_0-11.544439822888355.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/16320644-6b0c-40c3-96d1-5efb5fbd8c6f_2.069858302828444-4.7195282346965.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/783b2ca3-d8a0-476b-bff8-f92b2c2ea5f8_13.54294341231719-14.999977324263039.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/0ef24fad-f12e-4377-adfc-8550d6b9df06_4.3983276416061345-12.69964217588474.txt annotation is missing
File /nas/home/cborrelli/speech_forensics/rai/maj_results/7649e3c7-fba6-

In [17]:
feat_df.to_pickle('/nas/home/cborrelli/speech_forensics/notebook/pickle/rai_features.pkl')

In [16]:
feat_df

Unnamed: 0,path,noise,snr,mfcc,sfl,sc,sroff,zcr,rms,y_value,y_label,y_bin_label
0,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-317.2066962449291, -316.4607648084389, -33...","[0.061314885, 0.17396681, 0.18445234, 0.016183...","[4510.471693655841, 4434.899488237797, 3666.48...","[6120.0, 6320.0, 6120.0, 3460.0, 3340.0, 2920....","[0.2975, 0.56625, 0.38, 0.1575, 0.0925, 0.095,...","[0.030354282, 0.025215508, 0.020343885, 0.0549...",,2,1
1,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-271.4813078180702, -290.9811414617931, -31...","[0.0023553998, 0.0011393371, 0.0016238069, 0.0...","[960.8166317090922, 962.6708882339567, 875.988...","[1260.0, 1320.0, 1440.0, 2280.0, 5880.0, 6060....","[0.035, 0.06125, 0.05125, 0.105, 0.33875, 0.50...","[0.07967858, 0.080664575, 0.060430314, 0.01874...",,2,1
2,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-313.8264514523913, -317.2397584822869, -31...","[0.010604525, 0.018354833, 0.21542129, 0.15057...","[1508.7265570382442, 1675.2700612035192, 3457....","[2540.0, 3240.0, 6620.0, 6900.0, 7020.0, 6980....","[0.055, 0.15125, 0.355, 0.5525, 0.61875, 0.631...","[0.027392501, 0.022679903, 0.018245753, 0.0194...",,2,1
3,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-218.09224172474813, -336.19605727170153, -...","[0.0034844058, 0.00049026666, 0.0016169577, 0....","[1183.3948203844511, 1295.3823662170676, 1459....","[2000.0, 2380.0, 3000.0, 2680.0, 1860.0, 2020....","[0.03875, 0.0825, 0.09375, 0.08625, 0.0725, 0....","[0.1226548, 0.1087148, 0.085995495, 0.09183578...",,2,1
4,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-305.75523474435386, -347.82967841440245, -...","[0.0009542777, 0.0004848988, 0.0005416951, 0.0...","[832.9648157631027, 867.167942699225, 893.7676...","[800.0, 860.0, 1160.0, 2240.0, 2900.0, 3220.0,...","[0.03125, 0.065, 0.06, 0.06, 0.06875, 0.0725, ...","[0.097550645, 0.096245795, 0.0963786, 0.081582...",,2,1
5,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-473.0527329644325, -472.4513706981065, -36...","[0.0042919307, 0.005266437, 0.014978628, 0.018...","[1229.561777353797, 1129.1258602425178, 1484.6...","[1740.0, 1600.0, 2520.0, 3800.0, 3780.0, 3920....","[0.0525, 0.1075, 0.10875, 0.13625, 0.16375, 0....","[0.0069857715, 0.0071098045, 0.035862073, 0.06...",,2,1
6,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-382.40718918076976, -349.73507262957634, -...","[0.14818285, 0.032182384, 0.01859876, 0.009856...","[4900.677594869753, 1907.1678719808078, 1500.0...","[7060.0, 5060.0, 3100.0, 3000.0, 3220.0, 3400....","[0.225, 0.27, 0.1, 0.11, 0.115, 0.11375, 0.09,...","[0.009746371, 0.020581063, 0.052060332, 0.0763...",,2,1
7,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-238.49201880182912, -308.54757818218764, -...","[0.008055019, 0.0047808294, 0.016066337, 0.012...","[1294.818636479722, 1335.0548437364125, 1656.8...","[2900.0, 3660.0, 3920.0, 4080.0, 3380.0, 3320....","[0.0225, 0.05, 0.07375, 0.10125, 0.1075, 0.108...","[0.07455362, 0.05580197, 0.04457549, 0.0761440...",,2,1
8,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-537.5918279393737, -493.5363004546679, -34...","[0.013217581, 0.030918447, 0.016776541, 0.0040...","[1416.712548613168, 1551.9052161127888, 1483.6...","[3160.0, 2800.0, 2520.0, 2140.0, 1920.0, 1660....","[0.04875, 0.1125, 0.1125, 0.0975, 0.09375, 0.0...","[0.0028437956, 0.0053675612, 0.029717987, 0.05...",,2,1
9,/nas/home/cborrelli/speech_forensics/rai/singl...,rai,,"[[[-247.53518032487443, -246.71451964627036, -...","[0.006694663, 0.0046725525, 0.0015785894, 0.00...","[1288.9308386330276, 1350.0247869593932, 1381....","[2220.0, 2420.0, 2600.0, 2560.0, 2580.0, 2380....","[0.045, 0.09125, 0.08875, 0.07875, 0.08, 0.09,...","[0.073135644, 0.08359779, 0.097744584, 0.07556...",,2,1
