In [None]:
pip install --upgrade paramiko


In [None]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Paths to the datasets
tess_dir = r"E:\Downloads\TESS Toronto emotional speech set data"
ravdess_dir = r"E:\Downloads\archive\RAVDESS"

# Emotion mapping
emotion_map = {
    'angry': 'angry', 'disgust': 'disgust', 'fear': 'fear', 'happy': 'happy',
    'neutral': 'neutral', 'ps': 'surprise', 'sad': 'sad',
    '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
    '05': 'angry', '06': 'fear', '07': 'disgust', '08': 'surprise'
}

# Feature extraction function
def extract_audio_features(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    stft = np.abs(librosa.stft(y))
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    f0, _, _ = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    pitch = np.nanmean(f0) if f0 is not None else 0
    zcr = np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)
    rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
    cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)
    bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr).T, axis=0)
    return np.hstack([mfcc, chroma, mel, pitch, zcr, rms, cent, bw])

# Load dataset paths and labels
def load_dataset(tess_dir, ravdess_dir):
    data = []
    # TESS dataset
    for root, _, files in os.walk(tess_dir):
        for file in files:
            if file.endswith(".wav"):
                path = os.path.join(root, file)
                label = file.split('_')[-1].split('.')[0].lower()
                if label in emotion_map:
                    data.append([path, emotion_map[label]])

    # RAVDESS dataset
    for root, _, files in os.walk(ravdess_dir):
        for file in files:
            if file.endswith(".wav"):
                path = os.path.join(root, file)
                label_code = file.split('-')[2]
                if label_code in emotion_map:
                    data.append([path, emotion_map[label_code]])

    return pd.DataFrame(data, columns=["path", "emotion"])

# Load data
df = load_dataset(tess_dir, ravdess_dir)

# Extract features
X_features = df['path'].apply(extract_audio_features)
X = np.array(X_features.tolist())

# Encode labels
le = LabelEncoder()
y = le.fit_transform(df['emotion'])

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=le.classes_)

accuracy, report


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt

# Function to extract features
def extract_audio_features(filename):
    y, sr = librosa.load(filename, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
    stft = np.abs(librosa.stft(y))
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
    f0, _, _ = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    pitch = np.nanmean(f0) if f0 is not None else 0
    zcr = np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)
    rms = np.mean(librosa.feature.rms(y=y).T, axis=0)
    cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)
    bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr).T, axis=0)
    return np.hstack([mfcc, chroma, mel, pitch, zcr, rms, cent, bw])

# Replace this with your audio file path
external_audio_path = r"E:\Downloads\thank-you-locutora-virtual-joanna-rubio-136226 (1).mp3"

# Emotion labels dictionary
emotion_labels = {
    0: 'angry',
    1: 'calm',
    2: 'disgust',
    3: 'fearful',
    4: 'happy',
    5: 'neutral',
    6: 'sad',
    7: 'surprised'
}

# Extract features
external_features = extract_audio_features(external_audio_path).reshape(1, -1)

# Predict
predicted_probs = model.predict_proba(external_features)[0]
predicted_idx = np.argmax(predicted_probs)
predicted_emotion = emotion_labels[predicted_idx]

# Emotion embeddings
emotion_embeddings = {emotion_labels[i]: prob for i, prob in enumerate(predicted_probs)}

# --------------- Final Display ----------------
print(f"Predicted Emotion: {predicted_emotion}\n")
print("Emotion Embeddings (Probabilities):")
for emotion, prob in emotion_embeddings.items():
    print(f"{emotion}: {prob:.4f}")

# --------------- Plotting ----------------
plt.figure(figsize=(10, 5))
plt.bar(emotion_embeddings.keys(), emotion_embeddings.values(), color='skyblue')
plt.title(f'Emotion Embeddings for Audio File\nPredicted: {predicted_emotion}')
plt.xlabel('Emotion')
plt.ylabel('Probability')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()







