In [17]:
import os
import torch
import librosa
import numpy as np
from transformers import Wav2Vec2Processor, Wav2Vec2Model
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load pretrained Wav2Vec2
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base")
model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base")
model.eval()

# Load data
DATA_PATH = [
     '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Speech_Actors_01-24',
     '/Users/yathamlohithreddy/Desktop/vscodefloder /marsproject/Audio_Song_Actors_01-24'
]

emotion_map = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

X, y = [], []

def extract_wav2vec_embedding(file_path):
    waveform, sr = librosa.load(file_path, sr=16000)  # Wav2Vec2 needs 16kHz
    inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    # Mean pooling of last hidden state
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

for path in DATA_PATH:
    for actor in os.listdir(path):
        actor_folder = os.path.join(path, actor)
        if not os.path.isdir(actor_folder):
            continue
        for file in os.listdir(actor_folder):
            if file.endswith(".wav"):
                emotion_id = file.split('-')[2]
                emotion = emotion_map.get(emotion_id)
                if emotion:
                    file_path = os.path.join(actor_folder, file)
                    try:
                        emb = extract_wav2vec_embedding(file_path)
                        X.append(emb)
                        y.append(emotion)
                    except Exception as e:
                        print(f"Failed on {file_path}: {e}")

X = np.array(X)
y = np.array(y)

# Encode emotions
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split
x_train, x_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

#



In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# One-hot encode the labels
y_categorical = to_categorical(y, num_classes=8)

# Split
x_train, x_val, y_train, y_val = train_test_split(X, y_categorical, test_size=0.2, stratify=y, random_state=42)

# Build MLP
model = Sequential([
    Dense(512, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.4),
    Dense(256, activation='relu'),
    Dropout(0.4),
    Dense(8, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train
history = model.fit(
    x_train, y_train,
    validation_data=(x_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=4, factor=0.5)
    ]
)


ValueError: invalid literal for int() with base 10: np.str_('angry')

In [None]:
from sklearn.metrics import accuracy_score, classification_report, log_loss
import matplotlib.pyplot as plt

# Predictions
y_train_pred = clf.predict(x_train)
y_val_pred = clf.predict(x_val)

# Accuracy
train_acc = accuracy_score(y_train, y_train_pred)
val_acc = accuracy_score(y_val, y_val_pred)

# Optional: Loss (for probabilistic models)
y_train_prob = clf.predict_proba(x_train)
y_val_prob = clf.predict_proba(x_val)
train_loss = log_loss(y_train, y_train_prob)
val_loss = log_loss(y_val, y_val_prob)

# Print classification report
print(classification_report(y_val, y_val_pred))

# Plotting
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.bar(['Train', 'Validation'], [train_acc, val_acc], color=['skyblue', 'orange'])
plt.title("Accuracy")
plt.ylim(0, 1)

plt.subplot(1, 2, 2)
plt.bar(['Train', 'Validation'], [train_loss, val_loss], color=['skyblue', 'orange'])
plt.title("Log Loss")

plt.tight_layout()
plt.show()
