In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
%%capture
!pip install pydub
!pip install noisereduce
!pip install pyaudio
!pip install json-tricks

In [None]:
%%capture
import os
from json_tricks import load

import numpy as np

import librosa
from pydub import AudioSegment, effects
import noisereduce as nr

import tensorflow as tf
import keras
from keras.models import model_from_json
from keras.models import load_model

import matplotlib.pyplot as plt

In [None]:
saved_model_path = '/content/drive/My Drive/Colab Notebooks/model8723.json'
saved_weights_path = '/content/drive/My Drive/Colab Notebooks/model8723_weights.h5'

#Reading the model from JSON file
with open(saved_model_path, 'r') as json_file:
    json_savedModel = json_file.read()

# Loading the model architecture, weights
model = tf.keras.models.model_from_json(json_savedModel)
model.load_weights(saved_weights_path)

# Compiling the model with similar parameters as the original model.
model.compile(loss='categorical_crossentropy',
                optimizer='RMSProp',
                metrics=['categorical_accuracy'])

print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 64)           20480     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 8)                 520       
                                                                 
Total params: 54024 (211.03 KB)
Trainable params: 54024 (211.03 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [None]:
def preprocess(file_path, frame_length = 2048, hop_length = 512):

    # Fetch sample rate.
    _, sr = librosa.load(path = file_path, sr = None)
    # Load audio file
    rawsound = AudioSegment.from_file(file_path, duration = None)
    # Normalize to 5 dBFS
    normalizedsound = effects.normalize(rawsound, headroom = 5.0)
    # Transform the audio file to np.array of samples
    normal_x = np.array(normalizedsound.get_array_of_samples(), dtype = 'float32')
    # Noise reduction
    final_x = nr.reduce_noise(normal_x, sr=sr, use_tensorflow=True)


    f1 = librosa.feature.rms(final_x, frame_length=frame_length, hop_length=hop_length, center=True, pad_mode='reflect').T # Energy - Root Mean Square
    f2 = librosa.feature.zero_crossing_rate(final_x, frame_length=frame_length, hop_length=hop_length,center=True).T # ZCR
    f3 = librosa.feature.mfcc(final_x, sr=sr, S=None, n_mfcc=13, hop_length = hop_length).T # MFCC
    X = np.concatenate((f1, f2, f3), axis = 1)

    X_3D = np.expand_dims(X, axis=0)

    return X_3D

In [None]:
# Emotions list is created for a readable form of the model prediction.

emotions = {
    0 : 'neutral',
    1 : 'calm',
    2 : 'happy',
    3 : 'sad',
    4 : 'angry',
    5 : 'fearful',
    6 : 'disgust',
    7 : 'suprised'
}
emo_list = list(emotions.values())

def is_silent(data):
    # Returns 'True' if below the 'silent' threshold
    return max(data) < 100


In [None]:
def predict_emotion(audio_data, sample_rate):
    preprocessed_audio = preprocess_audio(audio_data, sample_rate)
    predictions = model.predict(preprocessed_audio)
    max_emotion = np.argmax(predictions)
    emotion_label = emotions.get(max_emotion, -1)
    return emotion_label

# Function to record audio from the microphone
def record_microphone_audio():
    print("Recording audio from the microphone. Press Enter to stop...")
    audio_data = sd.rec(int(10 * sample_rate), samplerate=sample_rate, channels=1)
    sd.wait()
    return audio_data, sample_rate

sample_rate = 44100  # Set your desired sample rate
audio_data, sample_rate = record_microphone_audio()

emotion = predict_emotion(audio_data, sample_rate)
print("Predicted emotion:", emotion)

Recording audio from the microphone. Press Enter to stop...


NameError: ignored

In [None]:
!pip install pyaudio

Collecting pyaudio
  Using cached PyAudio-0.2.14.tar.gz (47 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pyaudio [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for pyaudio[0m[31m
[0mFailed to build pyaudio
[31mERROR: Could not build wheels for pyaudio, which is required to install pyproject.toml-based projects[0m[31m
[0m

In [None]:
!pip install pipwin

Collecting pipwin
  Downloading pipwin-0.5.2.tar.gz (7.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting docopt (from pipwin)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyprind (from pipwin)
  Downloading PyPrind-2.11.3-py2.py3-none-any.whl (8.4 kB)
Collecting js2py (from pipwin)
  Downloading Js2Py-0.74-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting pySmartDL>=1.3.1 (from pipwin)
  Downloading pySmartDL-1.3.4-py3-none-any.whl (20 kB)
Collecting pyjsparser>=2.5.1 (from js2py->pipwin)
  Downloading pyjsparser-2.7.1.tar.gz (24 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pipwin, docopt, pyjsparser
  Building wheel for pipwin (setup.py) ... [?25l[?25hdone
  Created wheel for pipwin: filename=pipwin-0.5.2-py2.py3-none-any.whl size=8769 sha256=4a1220

In [None]:
!pipwin install pyaudio

  warn("Found a non Windows system. Package installation might not work.")
Building cache. Hang on . . .
Done
Package `pyaudio` found in cache
Downloading package . . .
https://download.lfd.uci.edu/pythonlibs/archived/PyAudio-0.2.11-cp310-cp310-win_amd64.whl
PyAudio-0.2.11-cp310-cp310-win_amd64.whl
[*] 0 bytes / 111 kB @ 0 bytes/s [------------------] [0.0%, 0s left]    [*] 0 bytes / 111 kB @ 0 bytes/s [------------------] [0.0%, 0s left]    [*] 0 bytes / 111 kB @ 0 bytes/s [------------------] [0.0%, 0s left]    [*] 8 kB / 111 kB @ 20 kB/s [#-----------------] [7.2%, 0s left]    [*] 72 kB / 111 kB @ 144 kB/s [###########-------] [64.8%, 0s left]   

In [None]:
!pip3 install pyaudio

Collecting pyaudio
  Using cached PyAudio-0.2.14.tar.gz (47 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pyaudio [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for pyaudio[0m[31m
[0mFailed to build pyaudio
[31mERROR: Could not build wheels for pyaudio, which is required to install pyproject.toml-based projects[0m[31m
[0m

In [None]:
!pip install pyproject.toml-based projects

[31mERROR: Could not find a version that satisfies the requirement pyproject.toml-based (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pyproject.toml-based[0m[31m
[0m

In [None]:
pip install PyAudio-0.2.14-cp310-cp310-win_amd64.whl


[0m[31mERROR: PyAudio-0.2.14-cp310-cp310-win_amd64.whl is not a supported wheel on this platform.[0m[31m
[0m

In [None]:
import pyaudio
import wave
from array import array
import struct
import time

# Initialize variables
RATE = 24414
CHUNK = 512
RECORD_SECONDS = 7.1

FORMAT = pyaudio.paInt32
CHANNELS = 1
WAVE_OUTPUT_FILE = "/content/drive/My Drive/Colab Notebooks/output.wav"

# Open an input channel
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)


# Initialize a non-silent signals array to state "True" in the first 'while' iteration.
data = array('h', np.random.randint(size = 512, low = 0, high = 500))

# SESSION START
print("** session started")
total_predictions = [] # A list for all predictions in the session.
tic = time.perf_counter()

while is_silent(data) == False:
    print("* recording...")
    frames = []
    data = np.nan # Reset 'data' variable.

    timesteps = int(RATE / CHUNK * RECORD_SECONDS) # => 339

    # Insert frames to 'output.wav'.
    for i in range(0, timesteps):
        data = array('l', stream.read(CHUNK))
        frames.append(data)

        wf = wave.open(WAVE_OUTPUT_FILE, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

    print("* done recording")

    x = preprocess(WAVE_OUTPUT_FILE) # 'output.wav' file preprocessing.
    # Model's prediction => an 8 emotion probabilities array.
    predictions = model.predict(x, use_multiprocessing=True)
    pred_list = list(predictions)
    pred_np = np.squeeze(np.array(pred_list).tolist(), axis=0) # Get rid of 'array' & 'dtype' statments.
    total_predictions.append(pred_np)

    # Present emotion distribution for a sequence (7.1 secs).
    fig = plt.figure(figsize = (10, 2))
    plt.bar(emo_list, pred_np, color = 'darkturquoise')
    plt.ylabel("Probabilty (%)")
    plt.show()

    max_emo = np.argmax(predictions)
    print('max emotion:', emotions.get(max_emo,-1))

    print(100*'-')

    # Define the last 2 seconds sequence.
    last_frames = np.array(struct.unpack(str(96 * CHUNK) + 'B' , np.stack(( frames[-1], frames[-2], frames[-3], frames[-4],
                                                                            frames[-5], frames[-6], frames[-7], frames[-8],
                                                                            frames[-9], frames[-10], frames[-11], frames[-12],
                                                                            frames[-13], frames[-14], frames[-15], frames[-16],
                                                                            frames[-17], frames[-18], frames[-19], frames[-20],
                                                                            frames[-21], frames[-22], frames[-23], frames[-24]),
                                                                            axis =0)) , dtype = 'b')
    if is_silent(last_frames): # If the last 2 seconds are silent, end the session.
        break

# SESSION END
toc = time.perf_counter()
stream.stop_stream()
stream.close()
p.terminate()
wf.close()
print('** session ended')

# Present emotion distribution for the whole session.
total_predictions_np =  np.mean(np.array(total_predictions).tolist(), axis=0)
fig = plt.figure(figsize = (10, 5))
plt.bar(emo_list, total_predictions_np, color = 'indigo')
plt.ylabel("Mean probabilty (%)")
plt.title("Session Summary")
plt.show()

print(f"Emotions analyzed for: {(toc - tic):0.4f} seconds")

ModuleNotFoundError: ignored

In [None]:
pip install sounddevice



In [None]:
import sounddevice as sd
import numpy as np
import wave
from array import array
import struct
import time

# Initialize variables
RATE = 204288
CHUNK = 512
RECORD_SECONDS = 7.1

FORMAT = np.int32
CHANNELS = 1
WAVE_OUTPUT_FILE = "output.wav"

# Initialize a non-silent signals array to state "True" in the first 'while' iteration.
data = np.random.randint(low=0, high=500, size=512, dtype=np.int32)

# SESSION START
print("** session started")
total_predictions = []  # A list for all predictions in the session.
tic = time.perf_counter()

# Create a callback function to record audio
def audio_callback(indata, frames, time, status):
    if status:
        print("* Error:", status)
    frames = array('l', indata)
    wf.writeframes(b''.join(frames))

# Open a wave file for writing
wf = wave.open(WAVE_OUTPUT_FILE, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(4)  # 4 bytes for int32
wf.setframerate(RATE)

# Start recording
with sd.InputStream(callback=audio_callback, channels=1, samplerate=RATE):
    while True:
        print("* recording...")
        wf.writeframes(b''.join(frames))
        frames = []
        wf.writeframes(b''.join(frames))

        if len(frames) >= RATE * RECORD_SECONDS:
            break

wf.close()
print("* done recording")

x = preprocess(WAVE_OUTPUT_FILE)  # 'output.wav' file preprocessing.
# Model's prediction => an 8 emotion probabilities array.
predictions = model.predict(x, use_multiprocessing=True)
pred_list = list(predictions)
pred_np = np.squeeze(np.array(pred_list).tolist(), axis=0)  # Get rid of 'array' & 'dtype' statements.
total_predictions.append(pred_np)

# Present emotion distribution for a sequence (7.1 secs).
fig = plt.figure(figsize=(10, 2))
plt.bar(emo_list, pred_np, color='darkturquoise')
plt.ylabel("Probability (%)")
plt.show()

max_emo = np.argmax(predictions)
print('max emotion:', emotions.get(max_emo, -1))

print(100 * '-')

# Define the last 2 seconds sequence.
last_frames = np.array(
    struct.unpack(str(96 * CHUNK) + 'B',
                  np.stack((frames[-1], frames[-2], frames[-3], frames[-4],
                             frames[-5], frames[-6], frames[-7], frames[-8],
                             frames[-9], frames[-10], frames[-11], frames[-12],
                             frames[-13], frames[-14], frames[-15], frames[-16],
                             frames[-17], frames[-18], frames[-19], frames[-20],
                             frames[-21], frames[-22], frames[-23], frames[-24]),
                            axis=0)),
    dtype='b')
if is_silent(last_frames):  # If the last 2 seconds are silent, end the session.
    break

# SESSION END
toc = time.perf_counter()
print('** session ended')

# Present emotion distribution for the whole session.
total_predictions_np = np.mean(np.array(total_predictions).tolist(), axis=0)
fig = plt.figure(figsize=(10, 5))
plt.bar(emo_list, total_predictions_np, color='indigo')
plt.ylabel("Mean probability (%)")
plt.title("Session Summary")
plt.show()

print(f"Emotions analyzed for: {(toc - tic):0.4f} seconds")


OSError: ignored

In [None]:
pip install SpeechRecognition


Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.0-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.0


In [None]:
pip install pyttsx3

Collecting pyttsx3
  Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.90


In [None]:
import wave
import audioop
import time
import requests
import matplotlib.pyplot as plt
import numpy as np

# Initialize variables
RATE = 204288
CHANNELS = 1
WIDTH = 2
RECORD_SECONDS = 7.1
OUTPUT_FILENAME = "output.wav"

# Create a wave file to write the audio data
wf = wave.open(OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(WIDTH)
wf.setframerate(RATE)

# Initialize a non-silent signals flag to start "True" in the first iteration.
non_silent = True
total_audio_data = b''  # To store the complete audio data
tic = time.perf_counter()

# Emoji for voice output
emoji_recording = "🎤"
emoji_done = "🎵"

print(f"{emoji_recording} Recording...")

while non_silent:
    audio_data = input("Press Enter to stop recording...")
    audio_data = audio_data.encode()  # Convert to bytes

    # Check if the audio data is silent
    rms_value = audioop.rms(audio_data, WIDTH)
    non_silent = rms_value > 1000

    # Write the audio data to the wave file
    wf.writeframes(audio_data)
    total_audio_data += audio_data

# Close the wave file
wf.close()

# Calculate the time elapsed
toc = time.perf_counter()
print(f"{emoji_done} Session ended")
print(f"🕒 Audio recorded for: {toc - tic:0.4f} seconds")

# Visualize the recorded audio
audio_data = np.frombuffer(total_audio_data, dtype=np.int16)
print(audio)
plt.figure(figsize=(10, 4))
plt.plot(audio_data)
plt.title("Recorded Audio")
plt.xlabel("Time (samples)")
plt.ylabel("Amplitude")
plt.show()

# Example emotion analysis API URL (replace with your API URL)
emotion_api_url = "https://api.textrics.ai/emotion_analysis_api.php"


# Prepare audio data to send to the emotion analysis API
audio_data_to_send = total_audio_data  # You may need to format it according to the API requirements

# You can use different emotion analysis APIs here. Example with Text-based API:
response = requests.post(emotion_api_url, data=audio_data_to_send)

# Visualize the emotion analysis result
if response.status_code == 200:
    emotion_data = response.json()  # Replace with the actual way the API provides emotion data
    print("Emotion Analysis Result:", emotion_data)
    # You can visualize or interpret the emotion analysis result here
else:
    print("Error in emotion analysis API request:", response.status_code)


🎤 Recording...
Press Enter to stop recording...asdadfsdgdf


error: ignored

In [None]:
pip install pyAudioAnalysis

Collecting pyAudioAnalysis
  Downloading pyAudioAnalysis-0.3.14.tar.gz (41.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyAudioAnalysis
  Building wheel for pyAudioAnalysis (setup.py) ... [?25l[?25hdone
  Created wheel for pyAudioAnalysis: filename=pyAudioAnalysis-0.3.14-py3-none-any.whl size=41264372 sha256=4230825633b6c01a00188b82cce7ec810c8a9ff2ce041b24b1fca46503078ea3
  Stored in directory: /root/.cache/pip/wheels/a7/54/73/fa830689c2440d2c81ff175c60e374930ad1607a8881e0f43f
Successfully built pyAudioAnalysis
Installing collected packages: pyAudioAnalysis
Successfully installed pyAudioAnalysis-0.3.14


In [None]:
pip install eyed3


Collecting eyed3
  Downloading eyed3-0.9.7-py3-none-any.whl (246 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.1/246.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coverage[toml]<6.0.0,>=5.3.1 (from eyed3)
  Downloading coverage-5.5-cp310-cp310-manylinux1_x86_64.whl (238 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.0/239.0 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting deprecation<3.0.0,>=2.1.0 (from eyed3)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting filetype<2.0.0,>=1.0.7 (from eyed3)
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, deprecation, coverage, eyed3
Successfully installed coverage-5.5 deprecation-2.1.0 eyed3-0.9.7 filetype-1.2.0


In [None]:
pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
