In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install librosa #output of this cell was cleaned

In [None]:
import pandas as pd
import numpy as np
import os
import librosa
import random
import matplotlib.pyplot as plt
import pickle as pkl

SEED=21
np.random.seed(SEED)
random.seed(SEED)


In [None]:
voice_path = '/content/drive/MyDrive/ML/FP_data_cleaned/Voice_dn'
csv_path = '/content/drive/MyDrive/ML/FP_data_cleaned/data_cln.csv'

In [None]:
df = pd.read_csv(csv_path)
df_arr = df.iloc[:,:].to_numpy()
IDs = df.iloc[:,-1].to_numpy()
list_IDs = list(range(IDs.size))

In [None]:
def feat_extract(audio,sr):
  zcr = librosa.feature.zero_crossing_rate(audio) #1
  rms = librosa.feature.rms(y=audio) #1
  spec_cent = librosa.feature.spectral_centroid(y=audio, sr=sr) #1
  spec_bw = librosa.feature.spectral_bandwidth(y=audio, sr=sr) #1
  rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr) #1
  chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr) #12-> to 1 (averaged)
  tonality = librosa.feature.tonnetz(y=audio, sr=sr) #6
  mfcc = librosa.feature.mfcc(y=audio, sr=sr) #20
  mel = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=20) #20

  tonality_mean = np.mean(tonality, axis=1).reshape(-1, 1)
  tonality_var = np.var(tonality, axis=1).reshape(-1, 1)
  mfcc_mean = np.mean(mfcc, axis=1).reshape(-1, 1)
  mfcc_var = np.var(mfcc, axis=1).reshape(-1, 1)
  mel_mean = np.mean(mel, axis=1).reshape(-1, 1)
  mel_var = np.var(mel, axis=1).reshape(-1, 1)

  concat = np.array([np.mean(zcr), np.var(zcr),
                    np.mean(rms), np.var(rms),
                    np.mean(spec_cent), np.var(spec_cent),
                    np.mean(spec_bw), np.var(spec_bw),
                    np.mean(rolloff), np.var(rolloff),
                    np.mean(chroma_stft), np.mean(chroma_stft)]).reshape(-1,1)

  concat = np.concatenate((concat, tonality_mean, tonality_var,
                           mfcc_mean, mfcc_var,
                           mel_mean, mel_var), axis=0)
  return concat[:,0]


In [None]:
X = np.zeros((IDs.size,104)) #104 is the number of features I extracted using different methods
i=0
for ID in IDs:
  voice_ID = str(ID) + '.wav'
  path = os.path.join(voice_path, voice_ID)
  data, rate = librosa.load(path)
  XX=feat_extract(data,rate)
  X[i]=XX
  i=i+1
  print(i)

# saving extracted features as a numpy array
X_saving_path='/content/drive/MyDrive/ML/FP_data_cleaned/X.npy' 
np.save(X_saving_path, X)

#saving labels as a numpy array
labels_saving_path='/content/drive/MyDrive/ML/FP_data_cleaned/labels_.npy'
np.save(labels_saving_path,df_arr)

# the output of this cell has been cleand