<a href="https://colab.research.google.com/github/nikhil20499/speech-text-sentiment-analysis/blob/master/working.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install SpeechRecognition
!pip install pyttsx3
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install PyAudio


In [None]:
pip install librosa soundfile numpy sklearn pyaudio

In [None]:
!pip install librosa==0.6.3 numpy soundfile==0.9.0 sklearn pyaudio==0.2.11
!ls '/content/drive/MyDrive/DATA/speech-emotion-recognition-ravdess-data.zip'
!unzip '/content/drive/MyDrive/DATA/speech-emotion-recognition-ravdess-data.zip'
!ls

In [None]:
!pip install numba==0.48

In [49]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

#function for extracting mfcc, chroma, and mel features from sound file
def extract_feature(file_name, mfcc, chroma, mel):
  with soundfile.SoundFile(file_name) as sound_file:
    X = sound_file.read(dtype="float32")
    sample_rate=sound_file.samplerate
    if chroma:
      stft=np.abs(librosa.stft(X))
    result=np.array([])
    if mfcc:
      mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
      result=np.hstack((result, mfccs))
    if chroma:
      chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
      result=np.hstack((result, chroma))
    if mel:
      mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
      result=np.hstack((result, mel))
  return result

In [None]:
#Define the motions dictionary

In [50]:
emotions = {'01':'neutral','02':'calm','03':'happy','04':'sad','05':'angry','06':'fearful','07':'disgust','08':'surprised'}
observed_emotions = ['calm', 'happy', 'fearful', 'disgust']   #Emotions we want to observe

In [51]:
#Load the data and extract features for each sound file
def load_data(test_size = 0.30):
  x, y = [], []
  for folder in glob.glob('/content/Actor_*'):
    print(folder)
    for file in glob.glob(folder + '/*.wav'):
      file_name = os.path.basename(file)
      emotion = emotions[file_name.split('-')[2]]
      if emotion not in observed_emotions:
        continue
      feature = extract_feature(file, mfcc = True, chroma = True, mel = True)
      x.append(feature)
      y.append(emotion)
  return train_test_split(np.array(x), y, test_size = test_size, random_state = 9)

In [None]:
# split the data
x_train,x_test,y_train,y_test=load_data(test_size=0.2)

In [None]:
#Shape of train and test set and Number of features extracted
print(np.shape(x_train),np.shape(x_test), np.shape(y_train),np.shape(y_test))
print((x_train.shape[0], x_test.shape[0]))
print((x_train[0], x_test[0]))
print(f'Features extracted: {x_train.shape[1]}')

In [None]:
#Now apply  Multi Layer Perceptron(MLP) Classifier SO We create an object 'model' for MLP
model = MLPClassifier(alpha = 0.01, batch_size = 256, epsilon = 1e-08, hidden_layer_sizes = (300,), learning_rate = 'adaptive', max_iter = 500)
model.fit(x_train, y_train) # here we fit the model

In [None]:
#Predict for the test set
y_pred = model.predict(x_test)
y_pred

In [None]:
#Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:{:.2f}%".format(accuracy*100))

In [58]:
#SAVING THE MODEL
import pickle
# Save the Modle to file in the current working directory
#For any new testing data other than the data in dataset
Pkl_Filename ="Emotion_Voice_Detection_Model.pkl"
with open(Pkl_Filename, 'wb') as file:
  pickle.dump(model, file)

In [None]:
# Load the Model back from file
with open(Pkl_Filename, 'rb') as file:
  Emotion_Voice_Detection_Model = pickle.load(file)
Emotion_Voice_Detection_Model

In [None]:
#predicting :
y_pred=Emotion_Voice_Detection_Model.predict(x_test)
y_pred

In [None]:
#Store the Prediction probabilities into CSV file
import numpy as np
import pandas as pd
y_pred1 = pd.DataFrame(y_pred, columns=['predictions'])
print(y_pred1)
y_pred1.to_csv('predictionfinal.csv')

In [None]:
#Record sound script step
!pip install ffmpeg-python

In [None]:
!pip install ffmpeg-python
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg

AUDIO_HTML = """

<script>

var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");
my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);
var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;
var handleSuccess = function(stream) {
    gumStream = stream;
    var options = {
        //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
        mimeType : 'audio/webm;codecs=opus'
        //mimeType : 'audio/webm;codecs=pcm'
        };
        //recorder = new MediaRecorder(stream, options);
        recorder = new MediaRecorder(stream);
        recorder.ondataavailable = function(e) {
            var url = URL.createObjectURL(e.data);
            var preview = document.createElement('audio');
            preview.controls = true;
            preview.src = url;
            document.body.appendChild(preview);
            reader = new FileReader();
            reader.readAsDataURL(e.data);
            reader.onloadend = function() {
                base64data = reader.result;
                //console.log("Inside FileReader:" + base64data);
            }
         };
         recorder.start();
        };
    recordButton.innerText = "Recording... press to stop";
    navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);
    function toggleRecording() {
        if (recorder && recorder.state == "recording"){
          recorder.stop();
          gumStream.getAudioTracks()[0].stop();
          recordButton.innerText = "Saving the recording... pls wait!"
        }
    }
        
    // https://stackoverflow.com/a/951057
    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }
    var data = new Promise(resolve=>{
        //recordButton.addEventListener(click, toggleRecording);
        recordButton.onclick = ()=>{
        toggleRecording()
        sleep(2000).then(() => {
            // wait 2000ms for the data to be available...
            // ideally this should use something like await...
            //console.log("Inside data:" + base64data)
            resolve(base64data.toString())
        });
        }
    });
</script>
"""
def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])
  process = (ffmpeg
             .input('pipe:0')
             .output('pipe:1', format='wav')
             .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
             )
  output, err = process.communicate(input=binary)
  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
    q, r = divmod(q, 256)
    b.append(r)
    # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]
  sr, audio = wav_read(io.BytesIO(riff))
  return audio, sr

In [77]:
audio, sr= get_audio()

In [78]:
!sudo apt-get install libportaudio2
!pip install sounddevice

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libportaudio2 is already the newest version (19.6.0-1).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.


In [79]:
import sounddevice as sd
from scipy.io.wavfile import write
write('output.wav', sr,audio)  # Save as WAV file 

In [80]:
m=extract_feature('output.wav', mfcc = True, chroma = True, mel = True).reshape(1,-1)

In [81]:
v=Emotion_Voice_Detection_Model.predict(m)
print(v)

['happy']
