# Polonaise vs. Waltz

In [12]:
labels  = ['Polonaise','Waltz']

audio_dir = 'data/samples_by_fold/'
data_dir = 'data/imageNet_data/'
spectrogram_dir = 'data/spectrograms/'

## Creating spectrograms

In [2]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram # Convert data to Spectogram
import librosa # https://librosa.org/doc/latest/index.html
import numpy as np
from pathlib import Path
import os
import librosa.display
import gc

gc.enable()

def create_fold_spectrograms(fold):
    gc.collect()
    spectrogram_path = Path(spectrogram_dir)  
    audio_path = Path(audio_dir)  
    print(f'Processing fold {fold}')
    os.mkdir(spectrogram_path/fold)
    for audio_file in list(Path(audio_path/f'Fold_{fold}').glob('*.wav')):
        samples, sample_rate = librosa.load(audio_file) #loads and decodes the audio to time series samples
        fig = plt.figure(figsize=[0.72,0.72])
        ax = fig.add_subplot(111)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        ax.set_frame_on(False)
        filename  = spectrogram_path/fold/Path(audio_file).name.replace('.wav','.png')
        S = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
        plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
        plt.close('all')

In [3]:
for i in range(6, 11):
    create_fold_spectrograms(str(i))

Processing fold 6
Processing fold 7
Processing fold 8
Processing fold 9
Processing fold 10


Creating ImageNet style directory structure for each of the folds for easy data loading

In [31]:
import os
import shutil
from pathlib import Path

data_path = Path(data_dir)
spectrogram_path = Path(spectrogram_dir)


def create_fold_directory(fold):
    gc.collect()
    png_files = list(Path(spectrogram_path / fold).glob('*.png'))
    os.mkdir(data_path / fold)
    os.mkdir(data_path / fold / 'train')
    os.mkdir(data_path / fold / 'valid')
    print(f'Processing fold {fold}')

    for label in labels:
        os.mkdir(data_path / fold / 'train' / label)
        os.mkdir(data_path / fold / 'valid' / label)

    for i, file in enumerate(png_files, start=1):
        if file.name.__contains__('Polonaise'):
            label = 0
        else:  # Waltz
            label = 1

        new_file_name = f"{i}.png"
        shutil.copy(file, data_path / fold / 'valid' / labels[label] / new_file_name)

    for i in range(1, 11):
        if str(i) == fold:
            continue
        png_files = list(Path(spectrogram_path / str(i)).glob('*.png'))
        for j, file in enumerate(png_files, start=1):
            if file.name.__contains__('Polonaise'):
                label = 0
            else:  # Waltz
                label = 1
            new_file_name = f"{j}.png"
            shutil.copy(file, data_path / fold / 'train' / labels[label] / new_file_name)


In [32]:
for i in range(1, 11):
    create_fold_directory(str(i))

Processing fold 1
Processing fold 2
Processing fold 3
Processing fold 4
Processing fold 5
Processing fold 6
Processing fold 7
Processing fold 8
Processing fold 9
Processing fold 10


### Initial look at the data

In [7]:
from pathlib import Path
from fastai.vision.augment import Resize
from fastai.vision.data import ImageDataLoaders
import torch

data_path = Path(data_dir + '/1' )
device = torch.device('mps')
# Assuming you have 'train' and 'valid' directories inside each fold directory
data = ImageDataLoaders.from_folder( data_path)

# Display some information about the loaded data
#data.show_batch()


Could not do one pass in your dataloader, there is something wrong in it. Please see the stack trace below:


RuntimeError: The MPS backend is supported on MacOS 12.3+.Current OS version can be queried using `sw_vers`

In [1]:
data.show_batch(rows=6, figsize=(12,12))

NameError: name 'data' is not defined

## Training and testing 

In [39]:
def process_fold(fold):
    gc.collect()
    data_directory = Path(spectrogram_dir)
    data = ImageDataLoaders.from_folder(data_directory/fold, ds_tfms=[], size=224)
    data.normalize(imagenet_stats)
    learn = cnn_learner(data, models.resnet34, metrics=error_rate)
    learn.fit_one_cycle(3)
    learn.unfreeze()
    learn.fit_one_cycle(1)
    learn.fit_one_cycle(3, max_lr=slice(1e-6,1e-4))
    return learn

In [40]:
final_scores = []
for fold in range(1,11):
  print(f'Processing Fold {fold}')
  learn = process_fold(str(fold))
  final_scores.append(learn.recorder.metrics[2][0].tolist())
  print('-'*50)

Processing Fold 1


TypeError: 'NoneType' object is not iterable

In [None]:
print(f'Final Accuracy : {round(1-np.mean(final_scores),4)*100}%')

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np
from pathlib import Path

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
}

data_path = Path(data_dir + '/1')

# Load the data
image_datasets = {x: datasets.ImageFolder(data_path / x, data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=64, shuffle=True, num_workers=4) for x in ['train', 'valid']}

# Define the model
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # Assuming there are 2 classes
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(3):  # Adjust the number of epochs as needed
    for phase in ['train', 'valid']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = corrects.double() / len(image_datasets[phase])

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

print('Training complete')


  torch.set_num_threads(1)
  torch.set_num_threads(1)
  torch.set_num_threads(1)
  torch.set_num_threads(1)


KeyboardInterrupt: 