### Model preparation

In [1]:
import torch
import torch.nn as nn
from torchvision.models import densenet121
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
import pandas as pd

num_classes = 8

model = densenet121()
model.classifier = nn.Linear(model.classifier.in_features, num_classes)

### Dataset preparation

In [2]:
class SpectrogramDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, label_map=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.label_map = label_map or self._build_label_map()

    def _build_label_map(self):
        labels = self.annotations['label'].unique()
        return {label: idx for idx, label in enumerate(sorted(labels))}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        filename = self.annotations.iloc[idx, 0]
        subfolder = filename[:3]
        img_name = os.path.join(self.root_dir, subfolder, filename)
        print(img_name)
        image = Image.open(img_name).convert('RGB')
        label_str = self.annotations.iloc[idx, 1]
        label = self.label_map[label_str]

        if self.transform:
            image = self.transform(image)

        return image, label


In [3]:
'''

df = pd.read_csv("project_data/tracks.csv")
df['filename'] = df['track_id'].apply(lambda x: f"{int(x):06d}.png")
df = df.dropna(subset=['genre'])
labels_df = df[['filename', 'genre']].rename(columns={'genre': 'label'})
train_df, val_df = train_test_split(
    labels_df,
    test_size=0.2,
    stratify=labels_df['label'],
    random_state=42
)
train_df.to_csv("project_data/train_set.csv", index=False)
val_df.to_csv("project_data/val_set.csv", index=False)

'''

'\n\ndf = pd.read_csv("project_data/tracks.csv")\ndf[\'filename\'] = df[\'track_id\'].apply(lambda x: f"{int(x):06d}.png")\ndf = df.dropna(subset=[\'genre\'])\nlabels_df = df[[\'filename\', \'genre\']].rename(columns={\'genre\': \'label\'})\ntrain_df, val_df = train_test_split(\n    labels_df,\n    test_size=0.2,\n    stratify=labels_df[\'label\'],\n    random_state=42\n)\ntrain_df.to_csv("project_data/train_set.csv", index=False)\nval_df.to_csv("project_data/val_set.csv", index=False)\n\n'

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_dataset = SpectrogramDataset(
    csv_file='project_data/train_set.csv',
    root_dir='project_data/spectrograms',
    transform=transform
)

val_dataset = SpectrogramDataset(
    csv_file='project_data/val_set.csv',
    root_dir='project_data/spectrograms',
    transform=transform
)

In [5]:
#validation
image, label = train_dataset[1]
print(image.shape)
print(label)
print(train_dataset.label_map)


project_data/spectrograms\006\006330.png
torch.Size([3, 224, 224])
5
{'Electronic': 0, 'Experimental': 1, 'Folk': 2, 'Hip-Hop': 3, 'Instrumental': 4, 'International': 5, 'Pop': 6, 'Rock': 7}


### Model training

In [6]:
import torch.optim as optim
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

checkpoint = torch.load('checkpoint.pth', map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch']
num_epochs = 10

print(start_epoch)

1


In [7]:
for epoch in range(start_epoch, num_epochs):

    #Training
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)

    #Validation
    model.eval()
    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    accuracy = correct / total

    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Accuracy: {accuracy:.4f}")

project_data/spectrograms\127\127280.png
project_data/spectrograms\116\116383.png
project_data/spectrograms\127\127330.png
project_data/spectrograms\022\022000.png
project_data/spectrograms\122\122627.png
project_data/spectrograms\070\070403.png
project_data/spectrograms\117\117944.png
project_data/spectrograms\012\012052.png
project_data/spectrograms\055\055232.png
project_data/spectrograms\058\058162.png
project_data/spectrograms\120\120206.png
project_data/spectrograms\061\061734.png
project_data/spectrograms\004\004070.png
project_data/spectrograms\115\115268.png
project_data/spectrograms\141\141901.png
project_data/spectrograms\145\145464.png
project_data/spectrograms\007\007528.png
project_data/spectrograms\132\132589.png
project_data/spectrograms\056\056799.png
project_data/spectrograms\038\038879.png
project_data/spectrograms\098\098585.png
project_data/spectrograms\071\071695.png
project_data/spectrograms\075\075375.png
project_data/spectrograms\120\120296.png
project_data/spe

KeyboardInterrupt: 

In [None]:
'''
torch.save({
    'epoch': 1,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}, 'checkpoint.pth')
'''

In [None]:
'''
torch.save(model.state_dict(), 'model.pth')
'''

In [None]:
'''

num_classes = 8
model = densenet121()
model.classifier = nn.Linear(model.classifier.in_features, num_classes)
model.load_state_dict(torch.load('model.pth'))
model.eval()
success = 0
val_size = 1
for elem in val_dataset:
    image, label = elem
    with torch.no_grad():
        output = model(image.unsqueeze(0))
    _, predicted = torch.max(output, 1)

    if predicted == label:
        success += 1
    val_size += 1
print(success/val_size)

'''