In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
data = pd.read_csv('audio_features_with_arousal.csv')

X = data.drop(columns=['arousal']).values
y = data['arousal'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


class FeedforwardNN(nn.Module):
    def __init__(self, input_size):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2= nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x


#Initialize the model
input_size = X_train.shape[1]
print(input_size)
model = FeedforwardNN(input_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

model.eval()
test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item()

# Print average test loss
test_loss /= len(test_loader)
print(f"Test Loss: {test_loss:.4f}")


24
Epoch [1/1000], Loss: 0.1009
Epoch [2/1000], Loss: 0.0641
Epoch [3/1000], Loss: 0.0588
Epoch [4/1000], Loss: 0.0538
Epoch [5/1000], Loss: 0.0506
Epoch [6/1000], Loss: 0.0458
Epoch [7/1000], Loss: 0.0450
Epoch [8/1000], Loss: 0.0443
Epoch [9/1000], Loss: 0.0470
Epoch [10/1000], Loss: 0.0410
Epoch [11/1000], Loss: 0.0408
Epoch [12/1000], Loss: 0.0411
Epoch [13/1000], Loss: 0.0387
Epoch [14/1000], Loss: 0.0429
Epoch [15/1000], Loss: 0.0388
Epoch [16/1000], Loss: 0.0384
Epoch [17/1000], Loss: 0.0375
Epoch [18/1000], Loss: 0.0366
Epoch [19/1000], Loss: 0.0391
Epoch [20/1000], Loss: 0.0374
Epoch [21/1000], Loss: 0.0371
Epoch [22/1000], Loss: 0.0353
Epoch [23/1000], Loss: 0.0352
Epoch [24/1000], Loss: 0.0364
Epoch [25/1000], Loss: 0.0416
Epoch [26/1000], Loss: 0.0350
Epoch [27/1000], Loss: 0.0354
Epoch [28/1000], Loss: 0.0353
Epoch [29/1000], Loss: 0.0356
Epoch [30/1000], Loss: 0.0359
Epoch [31/1000], Loss: 0.0355
Epoch [32/1000], Loss: 0.0336
Epoch [33/1000], Loss: 0.0342
Epoch [34/1000],

In [18]:
np.save("X_train.npy", X_train)


In [15]:
import torch
from sklearn.metrics import mean_squared_error, mean_absolute_error

model.eval()

all_predictions = []
all_actuals = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        predictions = model(X_batch)
        
        all_predictions.extend(predictions.cpu().numpy().flatten())  
        all_actuals.extend(y_batch.cpu().numpy().flatten()) 

mse = mean_squared_error(all_actuals, all_predictions)
mae = mean_absolute_error(all_actuals, all_predictions)

print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")
average_diff = 0
max_diff = 0
for i in range(100):  
    print(f"Predicted: {all_predictions[i]:.4f}, Actual: {all_actuals[i]:.4f}")
    if abs(all_predictions[i]-all_actuals[i]) > max_diff:
        max_diff = abs(all_predictions[i]-all_actuals[i])
    average_diff += abs(all_predictions[i]-all_actuals[i])
average_diff /=100
print("Average mistake", average_diff)
print("Max mistake", max_diff)


Test MSE: 0.0304
Test MAE: 0.1365
Predicted: -0.1354, Actual: -0.2620
Predicted: 0.3609, Actual: 0.3810
Predicted: 0.3328, Actual: 0.5460
Predicted: -0.2111, Actual: -0.3977
Predicted: 0.2658, Actual: 0.2120
Predicted: 0.1216, Actual: 0.3698
Predicted: -0.0193, Actual: -0.3601
Predicted: -0.1209, Actual: -0.3054
Predicted: -0.0946, Actual: -0.1040
Predicted: 0.3659, Actual: 0.3870
Predicted: 0.4553, Actual: 0.5320
Predicted: -0.0843, Actual: 0.1200
Predicted: 0.1241, Actual: 0.3040
Predicted: 0.4134, Actual: 0.4260
Predicted: -0.1184, Actual: 0.1230
Predicted: 0.1995, Actual: 0.4133
Predicted: 0.1138, Actual: 0.0650
Predicted: 0.2220, Actual: 0.2308
Predicted: -0.0141, Actual: -0.0980
Predicted: 0.3338, Actual: 0.4650
Predicted: -0.0583, Actual: 0.0500
Predicted: 0.3881, Actual: 0.3670
Predicted: 0.0989, Actual: 0.1500
Predicted: 0.4354, Actual: 0.4559
Predicted: 0.3561, Actual: 0.4360
Predicted: 0.2754, Actual: 0.2874
Predicted: 0.5180, Actual: 0.5049
Predicted: 0.2845, Actual: 0.3770

In [16]:
model_path = "entire_model.pth"  # Define the path for saving the model

# Save the model's state dictionary
torch.save(model, model_path)
print(f"Entire model saved")

Entire model saved


In [12]:
# import librosa
# import numpy as np
# import torch
# from sklearn.preprocessing import StandardScaler
# 
# def extract_features(file_path, duration=1):
#     y, sr = librosa.load(file_path, sr=44100, duration=duration)
#     tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
#     rms = np.mean(librosa.feature.rms(y=y))
#     spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
#     zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))
#     mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)
#     features = np.hstack([tempo, rms, spectral_centroid, zero_crossing_rate, mfcc])
#     return features
# 
# # Load and scale the features
# file_path = "path/to/your/new_audio_file.wav"
# new_features = extract_features(file_path).reshape(1, -1)
# 
# scaler = StandardScaler()
# scaler.fit(X_train)
# new_features = scaler.transform(new_features)
# 
# # Convert to a PyTorch tensor
# new_features_tensor = torch.tensor(new_features, dtype=torch.float32)
# 
# # Load model and make a prediction
# model.eval()
# with torch.no_grad():
#     prediction = model(new_features_tensor)
# print(f"Predicted Arousal Value: {prediction.item()}")


ModuleNotFoundError: No module named 'librosa'

In [17]:
def final_nn(features):
    features = features.reshape(1,-1)
    scalar = StandardScaler()
    entire_X_train = np.load("X_train.npy")
    scalar.fit(entire_X_train)
    new_features_model = scalar.transform(features)
    new_features_tensor_model = torch.tensor(new_features_model, dtype=torch.float32)
    entire_model = torch.load("entire_model.pth")
    entire_model.eval()

    while torch.no_grad():
        model_prediction = entire_model(new_features_tensor_model)
        print("Predicted Intensity value is, pred", model_prediction)