# Import Libraries

In [None]:
#General
import numpy as np
import pandas as pd
import itertools
from tqdm import tqdm

# System
import os, fnmatch
import time
import warnings
warnings.simplefilter("ignore")

# Random Seed
from numpy.random import seed
seed(1)

# Audio
import librosa.display, librosa

!pip install -q noisereduce
import noisereduce as nr

In [None]:
# Root Directory of project
root = "/content/drive/MyDrive/ML_Final_Project"

# Load Data

In [None]:
data = pd.read_csv(f'{root}/clean_data.csv')
data

Unnamed: 0,voice_id,emotion_id,text_id,gender,age,file_path
0,15997,1,1,1,21,/content/drive/MyDrive/ML_Final_Project/Voice/...
1,16001,1,2,1,21,/content/drive/MyDrive/ML_Final_Project/Voice/...
2,16005,1,3,1,21,/content/drive/MyDrive/ML_Final_Project/Voice/...
3,16009,1,4,1,21,/content/drive/MyDrive/ML_Final_Project/Voice/...
4,16013,1,5,1,21,/content/drive/MyDrive/ML_Final_Project/Voice/...
...,...,...,...,...,...,...
16435,10563,4,6,0,54,/content/drive/MyDrive/ML_Final_Project/Voice/...
16436,10567,4,7,0,54,/content/drive/MyDrive/ML_Final_Project/Voice/...
16437,10571,4,8,0,54,/content/drive/MyDrive/ML_Final_Project/Voice/...
16438,10575,4,9,0,54,/content/drive/MyDrive/ML_Final_Project/Voice/...


# Final Check on Unique and Null values

In [None]:
dtypes=pd.DataFrame(data.dtypes,columns=["Data Type"])
dtypes["Unique Values"]=data.nunique()
dtypes["Null Values"]=data.isnull().sum()
dtypes.style.background_gradient(cmap='Set3',axis=0)

Unnamed: 0,Data Type,Unique Values,Null Values
voice_id,int64,16440,0
emotion_id,int64,4,0
text_id,int64,10,0
gender,int64,2,0
age,int64,57,0
file_path,object,16440,0


# Signal Processing Parameters

In [None]:
fs = 22050         # Sampling rate
n_fft = 2048       # length of the FFT window
hop_length = 512   # Number of samples between successive frames
n_mels = 128       # Number of Mel bands
n_mfcc = 13        # Number of MFCCs

# Define Function to Calculate MFCC, Mel, CHROMA features


In [None]:


def get_features(y, sr=fs):
    S = librosa.feature.melspectrogram(y, sr=fs, n_mels=n_mels, n_fft=n_fft)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc)
    mfcc_feature_vector = np.mean(mfcc,1)
    mel_feature_vector = np.mean(S,1)

    S = np.abs(librosa.stft(y))
    chroma = librosa.feature.chroma_stft(S=S, sr=sr)
    chroma_feature_vector = np.mean(chroma,1)

    return mfcc_feature_vector, chroma_feature_vector, mel_feature_vector



# Load audio files, calculate features and create feature vectors


In [None]:


files = data['file_path']

mfcc_feature_vectors = []
mel_feature_vectors = []
chroma_feature_vectors = []
sound_paths = []
errors = []

for i, f in enumerate(tqdm(files)):
    try:
      # Load audio file
      y, sr = librosa.load(f, sr=fs)

      # Trim the beginning and ending silence
      yt, index = librosa.effects.trim(y)

      # perform noise reduction
      ytr = nr.reduce_noise(y=yt, sr=sr)

      # Normalize
      ytr/=ytr.max() 

      # Remove voices with length more than 3 seconds
      dur = librosa.get_duration(y=y, sr=sr)
      if dur > 3:
        print(f'Deleted row #{i} with Duration:{dur}')
        errors.append(i)
        continue

      if len(ytr) < 2:
          print("Error loading %s" % f)
          continue
      
      # Get Features
      mfcc_feat, chroma_feat, mel_feat = get_features(ytr, sr)
      mfcc_feature_vectors.append(mfcc_feat)
      mel_feature_vectors.append(mel_feat)
      chroma_feature_vectors.append(chroma_feat)
      sound_paths.append(f)

    except Exception as e:
      continue
        
print("\n\nCalculated %d MFCC feature vectors and labels"%len(mfcc_feat))
print("\n\nCalculated %d MFCC flat feature vectors and labels"%len(mel_feat))
print("\nCalculated %d CHROMA feature vectors and labels"%len(chroma_feat))
print("\nDeleted %d Row feature vectors"%len(errors))


In [None]:
new = data.drop(errors, axis=0)
new

# Save Feature Csv

In [None]:
mfcc_df = pd.DataFrame(mfcc_feature_vectors)
print(mfcc_df.shape)
print(new.shape)

In [None]:
mfcc_df = pd.DataFrame(mfcc_feature_vectors)
result_mfcc = pd.concat([new[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']].reset_index(drop=True), mfcc_df.reset_index(drop=True)], axis=1, ignore_index=True)
result_mfcc.to_csv(f'{root}/Features/MFCC.csv')
result_mfcc

In [None]:
mel_df = pd.DataFrame(mel_feature_vectors)
result_mel = pd.concat([new[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']].reset_index(drop=True), mel_df.reset_index(drop=True)], axis=1)
result_mel.to_csv(f'{root}/Features/MFCC_flat.csv')
result_mel

In [None]:
chroma_df = pd.DataFrame(chroma_feature_vectors)
result_chroma = pd.concat([new[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']].reset_index(drop=True), chroma_df.reset_index(drop=True)], axis=1)
result_chroma.to_csv(f'{root}/Features/CHROMA.csv')
result_chroma

In [None]:
# Feature Extraction
!pip install -q surfboard
from surfboard.sound import Waveform
from surfboard.feature_extraction import extract_features
# from surfboard.feature_extraction import extract_features_from_paths
from surfboard.feature_extraction_multiprocessing import extract_features_from_paths

In [None]:
files = list(data['file_path'])

main1 = extract_features_from_paths(paths=files, components_list=['mfcc', 'log_melspec'], statistics_list=['mean', 'std'], sample_rate=44100, num_proc=2)
main1 = pd.concat([data[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']], main1], axis=1)
main1.to_csv(f'{root}/Features/melMFCC.csv')

main2 = extract_features_from_paths(paths=files, components_list=['rms',  'chroma_stft' ,'zerocrossing', 'ppe'], statistics_list=['mean', 'std'], sample_rate=44100, num_proc=2)
main2 = pd.concat([data[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']], main2], axis=1)
main2.to_csv(f'{root}/Features/zcrossRMS.csv')

main3 = extract_features_from_paths(paths=files, components_list=['magnitude_spectrum'], statistics_list=['mean', 'std'], sample_rate=44100, num_proc=2)
main3 = pd.concat([data[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']], main3], axis=1)
main3.to_csv(f'{root}/Features/spectrum.csv')

main4 = extract_features_from_paths(paths=files, components_list=['shannon_entropy_slidingwindow'], statistics_list=['mean', 'std'], sample_rate=44100, num_proc=2)
main4 = pd.concat([data[['voice_id', 'emotion_id', 'text_id', 'gender', 'age']], main4], axis=1)
main4.to_csv(f'{root}/Features/cwt.csv')
main1