In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.utils import to_categorical
import warnings
warnings.filterwarnings("ignore")


In [None]:

def load_ravdess(path="/kaggle/input/ravdess-emotional-speech-audio"):
    emotion_map = {
        '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
        '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
    }
    data = []
    for dirname, _, filenames in os.walk(path):
        for file in filenames:
            if file.endswith(".wav"):
                emotion = emotion_map[file.split("-")[2]]
                data.append((os.path.join(dirname, file), emotion))
    return data

def load_tess(path="/kaggle/input/toronto-emotional-speech-set-tess"):
    data = []
    for dirname, _, filenames in os.walk(path):
        for file in filenames:
            if file.endswith(".wav"):
                emotion = file.split("_")[2].replace(".wav", "").lower()
                if emotion == 'ps': emotion = 'surprised'
                data.append((os.path.join(dirname, file), emotion))
    return data

def load_crema(path="/kaggle/input/cremad"):
    emotion_map = {
        'ANG': 'angry', 'DIS': 'disgust', 'FEA': 'fearful',
        'HAP': 'happy', 'NEU': 'neutral', 'SAD': 'sad'
    }
    data = []
    for dirname, _, filenames in os.walk(path):
        for file in filenames:
            if file.endswith(".wav"):
                emotion = emotion_map[file.split("_")[2]]
                data.append((os.path.join(dirname, file), emotion))
    return data

def load_savee(path="/kaggle/input/surrey-audiovisual-expressed-emotion-savee"):
    emotion_map = {
        'a': 'angry', 'd': 'disgust', 'f': 'fearful',
        'h': 'happy', 'n': 'neutral', 'sa': 'sad', 'su': 'surprised'
    }
    data = []
    for file in os.listdir(path):
        if file.endswith(".wav"):
            prefix = file[:2] if file[:2] in emotion_map else file[0]
            emotion = emotion_map.get(prefix, 'unknown')
            data.append((os.path.join(path, file), emotion))
    return data

In [None]:
# === Combine and Visualize ===
all_data = load_ravdess() + load_tess() + load_crema() + load_savee()
df = pd.DataFrame(all_data, columns=["path", "emotion"])

plt.figure(figsize=(10, 5))
sns.countplot(data=df, x='emotion', order=df['emotion'].value_counts().index)
plt.title("Emotion Count")
plt.xticks(rotation=45)
plt.show()

In [None]:
# === Visualization ===
def visualize_audio(file_path):
    data, sr = librosa.load(file_path)
    plt.figure(figsize=(14, 4))
    librosa.display.waveshow(data, sr=sr)
    plt.title("Waveplot")
    plt.show()

    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(14, 4))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()
    plt.title("Spectrogram")
    plt.show()

In [None]:
unique_emotions = df['emotion'].unique()
for emotion in unique_emotions:
    sample_path = df[df['emotion'] == emotion]['path'].iloc[0]
    data, sr = librosa.load(sample_path, duration=3, offset=0.5)
    plt.figure(figsize=(14, 4))
    librosa.display.waveshow(data, sr=sr)
    plt.title(f"Waveplot - {emotion}")
    plt.show()

    X = librosa.stft(data)
    Xdb = librosa.amplitude_to_db(abs(X))
    plt.figure(figsize=(14, 4))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar()
    plt.title(f"Spectrogram - {emotion}")
    plt.show()

In [None]:
# === Data Augmentation ===
def noise(data):
    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
    return data + noise_amp * np.random.normal(size=data.shape[0])

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

In [None]:
def extract_features(data, sample_rate):
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result = np.hstack((result, zcr))

    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft))

    mfcc = librosa.feature.mfcc(y=data, sr=sample_rate)
    mfcc_mean = np.mean(mfcc.T, axis=0)
    mfcc_delta = np.mean(librosa.feature.delta(mfcc).T, axis=0)
    mfcc_delta2 = np.mean(librosa.feature.delta(mfcc, order=2).T, axis=0)
    result = np.hstack((result, mfcc_mean, mfcc_delta, mfcc_delta2))

    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms))

    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel))
    return result

def augment_and_extract(path):
    try:
        data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
        features = []

        # Original
        features.append(extract_features(data, sample_rate))

        # Noise added
        noise_data = data + 0.035 * np.random.normal(0, 1, len(data))
        features.append(extract_features(noise_data, sample_rate))

        # # Pitch shifted only (avoid broken time_stretch)
        pitch_data = librosa.effects.pitch_shift(data, sr=sample_rate, n_steps=0.7)
        features.append(extract_features(pitch_data, sample_rate))

            # Add time stretch
        stretch_data = librosa.effects.time_stretch(data, rate=1.1)
        features.append(extract_features(stretch_data, sample_rate))


        return features
    except Exception as e:
        print(f"Error processing {path}: {e}")
        return []

# === Process Dataset ===
X, y = [], []
for path, emotion in df.itertuples(index=False):
    feats = augment_and_extract(path)
    for f in feats:
        X.append(f)
        y.append(emotion)

if not X:
    raise ValueError("Feature extraction failed for all files. Check dataset integrity.")

X = np.array(X)

# Scale features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)


y_cat = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)

X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [None]:
# === CNN Model ===
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=(X_train.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Conv1D(128, 5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),
 
    Conv1D(256, 3, activation='relu'),
    #BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.3),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(y_cat.shape[1], activation='softmax')
])


model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
# Add callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5)

history = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, reduce_lr]
)

In [None]:
from tensorflow.keras.layers import BatchNormalization, Conv1D, MaxPooling1D, Dropout, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers

# L2 regularization 
l2_lambda = 0.00000001

model = Sequential([
    Conv1D(64, 5, activation='relu', input_shape=(X_train.shape[1], 1),
           kernel_regularizer=regularizers.l2(l2_lambda)),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.2),  # Reduced from 0.3

    Conv1D(128, 5, activation='relu',
           kernel_regularizer=regularizers.l2(l2_lambda)),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.2),  # Reduced from 0.3

    Conv1D(256, 3, activation='relu',
           kernel_regularizer=regularizers.l2(l2_lambda)),
    BatchNormalization(),  # Reintroduced
    MaxPooling1D(2),
    Dropout(0.2),  # Reduced from 0.3

    Conv1D(256, 3, activation='relu',  # Added another Conv1D layer
           kernel_regularizer=regularizers.l2(l2_lambda)),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.2),

    Flatten(),
    Dense(512, activation='relu',  # Increased units from 256 to 512
          kernel_regularizer=regularizers.l2(l2_lambda)),
    Dropout(0.25),  # Reduced from 0.4
    Dense(y_cat.shape[1], activation='softmax')
])

# Compile with AdamW
model.compile(loss='categorical_crossentropy', optimizer=AdamW(learning_rate=0.001), metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)  # Increased patience
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)  # Lowered min_lr

# Train
history = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, reduce_lr]
)

In [None]:
model.save("emotion_model.h5")

In [None]:
# === Plot Training History ===
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title("Loss Over Epochs")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title("Accuracy Over Epochs")
plt.legend()
plt.show()

In [None]:
# === Evaluation ===
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred, target_names=le.classes_))

conf_matrix = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, xticklabels=le.classes_, yticklabels=le.classes_, fmt='d')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

In [None]:
import numpy as np
import librosa
from IPython.display import Audio


# === Load a sample audio and predict ===
file_path = "/kaggle/input/ravdess-emotional-speech-audio/Actor_01/03-01-05-01-01-01-01.wav"
# === Load and preprocess audio ===
data, sample_rate = librosa.load(file_path, duration=3, offset=0.5)

# === Extract features using the same method used in training ===
feature = extract_features(data, sample_rate)


# === Scale and reshape ===
feature = scaler.transform([feature])  # shape: (1, 202)
feature = np.reshape(feature, (1, 202, 1))  # shape: (batch_size, timesteps, 1)

# === Predict ===
prediction = model.predict(feature)
predicted_class = np.argmax(prediction)
predicted_emotion = le.inverse_transform([predicted_class])[0]

print(f"Predicted Emotion: {predicted_emotion}")
Audio(file_path, rate=sample_rate)


In [None]:
print("done")