In [None]:
# Importing required libraries 
# Keras
import keras
from keras import regularizers
from keras.preprocessing import sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
# sklearn
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
# Other
import librosa
import librosa.display
import json
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import seaborn as sns
import glob
import os
import pickle
import IPython.display as ipd  # To play sound in the notebook
import numpy as np

In [None]:
TESS = "./Data/TESS/"
RAV = "./Data/RAVDESS/"
SAVEE = "./Data/SAVEE/"
CREMA = "./Data/CREMA-D/"

In [None]:
train_data = pd.read_csv("./train_features.csv")

scaler = StandardScaler()
scaler.fit(train_data.drop(['label','name'],axis=1))
x_train = scaler.transform(train_data.drop(['label','name'],axis=1))

pca = PCA(n_components = 75)
train_pca = pca.fit_transform(x_train)

In [None]:
features = pd.DataFrame(columns=['feature'])
labels = pd.DataFrame(columns=['label'])
names = pd.DataFrame(columns=['name'])
    
audio_sample_path = "./output.wav"
x, sample_rate = librosa.load(audio_sample_path)
print(sample_rate)
# feature_set stores all features of the audio file
feature_set = np.array([])

# MFCC feature extraction
# No. of MFCC Features = 40 (Default = 20)
mfccs=np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
feature_set=np.hstack((feature_set, mfccs))
print(mfccs.shape)
## Chroma feature extraction
# No. of Chroma Features = 12 (Always)
stft=np.abs(librosa.stft(x))
chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
feature_set=np.hstack((feature_set, chroma))
print(chroma.shape)
## Mel feature extraction
# No. of Mel Features = 128 (Default = 128)
mel=np.mean(librosa.feature.melspectrogram(y=x, sr=sample_rate).T,axis=0)
feature_set=np.hstack((feature_set, mel))
print(mel.shape)

print(feature_set.shape)

feature_set = np.reshape(feature_set, (1,-1))
print(feature_set.shape)


feature_set_pca = pca.transform(feature_set)
print("After PCA shape:", feature_set_pca.shape)

feature_set_reshaped = feature_set_pca.reshape(-1, 75, 1)
print("Reshaped for model input:", feature_set_reshaped.shape)

In [None]:
def dataset_generator(train_path, test_path):
        # Read the data from the saved CSV files
        test_data = pd.read_csv(test_path)

        # Perform standardization for better performance
        scaler = StandardScaler()
        x_test = scaler.transform(test_data,axis=1)

        # Perform Principle Component Analysis (PCA) to change dimensions and remove co-relation
        # No of PCA components = 75
        pca = PCA(n_components = 75)
        test_pca = pca.transform(x_test)
        print(test_pca.shape) 
        
        
        # Expanding dimensions for CNN
        x_train = np.expand_dims(train_pca, axis=2)
        x_test = np.expand_dims(test_pca, axis=2)
        
        # Print train and test shapes
        print(x_train.shape)
        print(x_test.shape)
        
        return x_train, x_test
    

In [None]:
model = keras.models.load_model("./saved_models/CNN with Feature Array.keras")

In [None]:
np.argmax(model.predict(feature_set_reshaped))

In [None]:
import sounddevice as sd
from scipy.io.wavfile import write

fs = 22050  # Sample rate
seconds = 3  # Duration of recording

myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
sd.wait()  # Wait until recording is finished
write('output.wav', fs, myrecording)  # Save as WAV fil

In [None]:
def record_and_predict():

    #Recording the audio from user's microphone
    sample_rate = 22050  # Sample rate
    seconds = 3  # Duration of recording

    myrecording = sd.rec(int(seconds * sample_rate), samplerate=sample_rate, channels=2)
    sd.wait()  # Wait until recording is finished
    write('output.wav', fs, myrecording)

    audio_sample_path = "./output.wav"
    x, sample_rate = librosa.load(audio_sample_path)
    print(sample_rate)
    # feature_set stores all features of the audio file
    feature_set = np.array([])

    # MFCC feature extraction
    # No. of MFCC Features = 40 (Default = 20)
    mfccs=np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
    feature_set=np.hstack((feature_set, mfccs))
    print(mfccs.shape)
    ## Chroma feature extraction
    # No. of Chroma Features = 12 (Always)
    stft=np.abs(librosa.stft(x))
    chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    feature_set=np.hstack((feature_set, chroma))
    print(chroma.shape)
    ## Mel feature extraction
    # No. of Mel Features = 128 (Default = 128)
    mel=np.mean(librosa.feature.melspectrogram(y=x, sr=sample_rate).T,axis=0)
    feature_set=np.hstack((feature_set, mel))
    print(mel.shape)

    print(feature_set.shape)

    feature_set = np.reshape(feature_set, (1,-1))
    print(feature_set.shape)


    feature_set_pca = pca.transform(feature_set)
    print("After PCA shape:", feature_set_pca.shape)

    feature_set_reshaped = feature_set_pca.reshape(-1, 75, 1)
    print("Reshaped for model input:", feature_set_reshaped.shape)

    match_array = ["female_angry", "female_calm", "female_disgust", "female_fearful", "female_happy", "female_neutral", "female_sad", "female_suprised", "male_angry", "male_calm", "male_disgust", "male_fearful", "male_happy", "male_neutral", "male_sad", "male_suprised"]
    print(match_array[np.argmax(model.predict(feature_set_reshaped))])
    

In [None]:
record_and_predict()