<a href="https://colab.research.google.com/github/sidpromo/data_mining/blob/main/data_mining_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import torch.nn as nn
import torch
from sklearn.preprocessing import StandardScaler
import torch.optim as optim
import torch.nn.functional as F


url = 'https://raw.githubusercontent.com/sidpromo/data_mining/refs/heads/main/features_30_sec.csv'
data = pd.read_csv(url)
data = data.drop(columns=['filename'])

selected_columns = [
    'chroma_stft_mean', 'chroma_stft_var',  # Chroma features
    'spectral_centroid_mean', 'spectral_bandwidth_mean', 'rolloff_mean', 'zero_crossing_rate_mean',  # Spectral features
    'tempo',  # Tempo
    'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean', 'mfcc4_mean', 'mfcc5_mean',  # MFCCs (first 5)
    'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean', 'mfcc9_mean', 'mfcc10_mean',  # MFCCs (next 5)
    'mfcc11_mean', 'mfcc12_mean', 'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean',  # MFCCs (next 5)
    'mfcc16_mean', 'mfcc17_mean', 'mfcc18_mean', 'mfcc19_mean', 'mfcc20_mean'  # MFCCs (last 5)
]
# Shuffle the data
shuffled_data = data.sample(frac=1, random_state=42).reset_index(drop=True)  # frac=1 means shuffle all rows
output_count = len(set(shuffled_data['label']))

# Split the dataset
train_data = shuffled_data.iloc[:850]
test_data = shuffled_data.iloc[850:1000]

# label conv
labels_train = train_data['label']
labels_test = test_data['label']

categories = list(set(shuffled_data['label']))

labels_train_numeric = pd.Categorical(labels_train, categories=categories).codes
labels_test_numeric = pd.Categorical(labels_test, categories=categories).codes

# features
features_train = train_data[selected_columns]
features_test = test_data[selected_columns]

# normalize features
scaler = StandardScaler()
features_train = pd.DataFrame(scaler.fit_transform(features_train), columns=features_train.columns)
features_test = pd.DataFrame(scaler.transform(features_test), columns=features_test.columns)

print(f"Training set size: {len(features_train)} samples")
print(f"Test set size: {len(features_test)} samples")
print(f"Input layer size: {features_train.shape[1]}, output layer size: {output_count}")

class GenreRecognizer(nn.Module):
    def __init__(self):
        super(GenreRecognizer, self).__init__()
        input_size = features_train.shape[1]
        nr_first_hidden_layer = 256
        nr_second_hidden_layer = 128
        self.fc1 = nn.Linear(input_size, nr_first_hidden_layer)
        self.fc2 = nn.Linear(nr_first_hidden_layer, nr_second_hidden_layer)
        self.out = nn.Linear(nr_second_hidden_layer, 10)
        self.dropout = nn.Dropout(p=0.3)  # Drop 30% of the neurons during training

    def forward(self, x):
        x = F.relu(self.fc1(x)) # ReLU activation for first hidden layer
        x = self.dropout(x)
        x = F.relu(self.fc2(x)) # ReLU activation for second hidden layer
        x = self.dropout(x)
        x = self.out(x) # Output layer (logits)
        return x # CrossEntropyLoss will handle softmax internally


X_train_tensor = torch.tensor(features_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(labels_train_numeric, dtype=torch.long)
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)


Training set size: 850 samples
Test set size: 150 samples
Input layer size: 27, output layer size: 10


In [5]:
trainloader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=32)

net = GenreRecognizer()
criterion = nn.CrossEntropyLoss()  # CrossEntropyLoss for multi-class classification
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=2e-4)

# Training loop
for epoch in range(1000):
    net.train()
    running_loss = 0.0
    running_n = 0

    # training acc
    correct_predictions = 0
    total_samples = 0

    for inputs, targets in trainloader:
        optimizer.zero_grad()  # Reset gradients
        outputs = net(inputs)  # Forward pass
        loss = criterion(outputs, targets)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
        running_n += 1

        # Calculate predictions and accuracy
        _, predicted = torch.max(outputs, 1)  # Get class with highest score
        correct_predictions += (predicted == targets).sum().item()  # Count correct predictions
        total_samples += targets.size(0)  # Count total samples


    print(f'Epoch {epoch + 1}, Loss: {running_loss / running_n:.4f}')
    # training_accuracy = correct_predictions / total_samples * 100  # Training accuracy as a percentage
    # print(f'Epoch {epoch + 1}, Loss: {running_loss / running_n:.4f}, Training Accuracy: {training_accuracy:.2f}%')


Epoch 1, Loss: 2.2951
Epoch 2, Loss: 2.2658
Epoch 3, Loss: 2.2191
Epoch 4, Loss: 2.1777
Epoch 5, Loss: 2.1375
Epoch 6, Loss: 2.0842
Epoch 7, Loss: 2.0381
Epoch 8, Loss: 1.9850
Epoch 9, Loss: 1.9572
Epoch 10, Loss: 1.9110
Epoch 11, Loss: 1.8694
Epoch 12, Loss: 1.8395
Epoch 13, Loss: 1.7999
Epoch 14, Loss: 1.7550
Epoch 15, Loss: 1.7324
Epoch 16, Loss: 1.7101
Epoch 17, Loss: 1.6915
Epoch 18, Loss: 1.6777
Epoch 19, Loss: 1.6523
Epoch 20, Loss: 1.6470
Epoch 21, Loss: 1.6081
Epoch 22, Loss: 1.5939
Epoch 23, Loss: 1.5770
Epoch 24, Loss: 1.5678
Epoch 25, Loss: 1.5703
Epoch 26, Loss: 1.5440
Epoch 27, Loss: 1.5470
Epoch 28, Loss: 1.5207
Epoch 29, Loss: 1.5254
Epoch 30, Loss: 1.4862
Epoch 31, Loss: 1.4703
Epoch 32, Loss: 1.4867
Epoch 33, Loss: 1.4687
Epoch 34, Loss: 1.4515
Epoch 35, Loss: 1.4509
Epoch 36, Loss: 1.4502
Epoch 37, Loss: 1.4285
Epoch 38, Loss: 1.4224
Epoch 39, Loss: 1.3994
Epoch 40, Loss: 1.3884
Epoch 41, Loss: 1.3879
Epoch 42, Loss: 1.3723
Epoch 43, Loss: 1.3599
Epoch 44, Loss: 1.35

In [6]:

net.eval()
test_inputs = torch.tensor(features_test.values, dtype=torch.float32)
test_targets = torch.tensor(labels_test_numeric, dtype=torch.short)

test_dataset = torch.utils.data.TensorDataset(test_inputs, test_targets)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True)

correct = 0
total = 0

with torch.no_grad():
  for inputs, targets in testloader:
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += targets.size(0)
    correct += (predicted == targets).sum().item()

print(f"Correct: {correct}, total: {total}, ration: {correct/total}")

Correct: 107, total: 150, ration: 0.7133333333333334
