In [3]:
# === Imports ===
import os, sys, warnings
import numpy as np
import pandas as pd
import librosa, librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dropout, Dense
from keras.callbacks import ReduceLROnPlateau

warnings.simplefilter("ignore")

# === Emotion Mapping ===
emotion_map = {
    1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad',
    5: 'angry', 6: 'fear', 7: 'disgust', 8: 'surprise'
}
emotion_list = list(emotion_map.values())

# === Load RAVDESS ===
ravdess_dir = '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24'  # ✅ Update with your actual RAVDESS path
ravdess_dirs = os.listdir(ravdess_dir)

file_emotion, file_path = [], []
for actor in ravdess_dirs:
    actor_path = os.path.join(ravdess_dir, actor)
    if not os.path.isdir(actor_path):
        continue
    for file in os.listdir(actor_path):
        try:
            emotion = int(file.split('-')[2])
            file_emotion.append(emotion_map[emotion])
            file_path.append(os.path.join(actor_path, file))
        except:
            continue

ravdess_df = pd.DataFrame({'Emotions': file_emotion, 'Path': file_path})

# === Load Song Dataset ===
song_dir = '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Song_Actors_01-24'  # ✅ Update with your actual song dataset path
song_paths, song_emotions = [], []
for root, _, files in os.walk(song_dir):
    for file in files:
        if file.endswith(('.wav', '.mp3')):
            for emo in emotion_list:
                if emo in file.lower():
                    full_path = os.path.join(root, file)
                    if os.path.exists(full_path):
                        song_paths.append(full_path)
                        song_emotions.append(emo)
                    break

song_df = pd.DataFrame({'Path': song_paths, 'Emotions': song_emotions})

# === Combine + Clean ===
data_path = pd.concat([ravdess_df, song_df], ignore_index=True)
data_path.dropna(subset=['Path', 'Emotions'], inplace=True)
data_path = data_path[data_path['Emotions'].isin(emotion_list)]
data_path = data_path[data_path['Path'].apply(os.path.exists)].reset_index(drop=True)

print("Total audio files after merge:", len(data_path))

# === Augmentation Functions ===
def noise(data):
    noise_amp = 0.035 * np.random.uniform() * np.amax(data)
    return data + noise_amp * np.random.normal(size=data.shape[0])

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)

def pitch(data, sr, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr, pitch_factor)

# === Feature Extraction ===
def extract_features(data, sr):
    result = np.array([])
    result = np.hstack((result, np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)))
    stft = np.abs(librosa.stft(data))
    result = np.hstack((result, np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)))
    result = np.hstack((result, np.mean(librosa.feature.mfcc(y=data, sr=sr).T, axis=0)))
    result = np.hstack((result, np.mean(librosa.feature.rms(y=data).T, axis=0)))
    result = np.hstack((result, np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)))
    return result

def get_features(path):
    data, sr = librosa.load(path, duration=2.5, offset=0.6)
    res1 = extract_features(data, sr)
    res2 = extract_features(noise(data), sr)
    res3 = extract_features(pitch(stretch(data), sr), sr)
    return np.vstack([res1, res2, res3])

# === Process All Audio Files ===
X, Y = [], []
success_count = 0

for path, emotion in zip(data_path.Path, data_path.Emotions):
    try:
        features = get_features(path)
        for f in features:
            X.append(f)
            Y.append(emotion)
        success_count += 1
    except Exception as e:
        print(f"[WARN] Failed on {path}: {e}")

print("Successfully processed:", success_count)

if len(Y) == 0:
    raise ValueError("No features extracted. Check dataset or feature extraction.")

# === Preprocessing ===
X = np.array(X)
Y = np.array(Y)
encoder = OneHotEncoder()
Y = encoder.fit_transform(Y.reshape(-1, 1)).toarray()

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0, shuffle=True)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)

# === CNN Model ===
model = Sequential([
    Conv1D(256, 5, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)),
    MaxPooling1D(5, strides=2, padding='same'),
    Conv1D(256, 5, padding='same', activation='relu'),
    MaxPooling1D(5, strides=2, padding='same'),
    Conv1D(128, 5, padding='same', activation='relu'),
    MaxPooling1D(5, strides=2, padding='same'),
    Dropout(0.2),
    Conv1D(64, 5, padding='same', activation='relu'),
    MaxPooling1D(5, strides=2, padding='same'),
    Flatten(),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(8, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# === Train ===
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, patience=2, min_lr=1e-7)
history = model.fit(x_train, y_train, batch_size=64, epochs=50, validation_data=(x_test, y_test), callbacks=[rlrp])

# === Evaluate ===
print(f"Test Accuracy: {model.evaluate(x_test, y_test)[1]*100:.2f}%")

# === Plot ===
epochs = range(50)
fig, ax = plt.subplots(1, 2, figsize=(20, 6))
ax[0].plot(epochs, history.history['loss'], label='Train Loss')
ax[0].plot(epochs, history.history['val_loss'], label='Test Loss')
ax[0].legend(); ax[0].set_title("Loss")

ax[1].plot(epochs, history.history['accuracy'], label='Train Accuracy')
ax[1].plot(epochs, history.history['val_accuracy'], label='Test Accuracy')
ax[1].legend(); ax[1].set_title("Accuracy")
plt.show()

# === Confusion Matrix ===
y_pred = encoder.inverse_transform(model.predict(x_test))
y_true = encoder.inverse_transform(y_test)

cm = confusion_matrix(y_true, y_pred)
cm_df = pd.DataFrame(cm, index=encoder.categories_[0], columns=encoder.categories_[0])
plt.figure(figsize=(12, 10))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.ylabel("Actual")
plt.xlabel("Predicted")
plt.show()

print(classification_report(y_true, y_pred))


Total audio files after merge: 1440
[WARN] Failed on /Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24/Actor_16/03-01-05-01-02-01-16.wav: time_stretch() takes 1 positional argument but 2 were given
[WARN] Failed on /Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24/Actor_16/03-01-06-01-02-02-16.wav: time_stretch() takes 1 positional argument but 2 were given
[WARN] Failed on /Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24/Actor_16/03-01-06-02-01-02-16.wav: time_stretch() takes 1 positional argument but 2 were given
[WARN] Failed on /Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24/Actor_16/03-01-05-02-01-01-16.wav: time_stretch() takes 1 positional argument but 2 were given
[WARN] Failed on /Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24/Actor_16/03-01-07-01-01-01-16.wav: time_stretch() takes 1 positional argum

ValueError: No features extracted. Check dataset or feature extraction.