In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchaudio
from PlaylistDatasetClass import PlaylistDataset
from CNNClass import CNNNetwork


def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

def train_single_epoch(model, data_loader, loss_fn, optimizer, device):
    for _input, target in data_loader:
        print(len(data_loader))
        _input, target = _input.to(device), target.to(device)
        
        prediction = model(_input)
        loss = loss_fn(prediction, target)
        
        optimizer.zero_grad()
        print('zero grad')
        loss.backward()
        print('backward')
        optimizer.step()
        print('step')
    print(f"loss: {loss.item()}")

def train(model, data_loader, loss_fn, optimizer, device, epochs):
    for i in range(epochs):
        print(f"Epochs {i+1}")
        train_single_epoch(model, data_loader, loss_fn, optimizer, device)
        print("---------------------------")
    print('Finished training.')    

In [None]:
if __name__ == "__main__":
    
    BATCH_SIZE = 128
    EPOCHS = 10
    LEARNING_RATE = 0.001
    ANNOTATIONS_FILE = 'data/annotations_file.csv'
    AUDIO_DIR = 'mp3'
    SAMPLE_RATE = 44100
    NUM_SAMPLES = 44100*120 # 2 minutes of audio
    
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Using Device: ', device)

    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
            sample_rate=SAMPLE_RATE,
            n_fft=1024,
            hop_length=512,
            n_mels=64
        )

    playlist_dataset = PlaylistDataset(ANNOTATIONS_FILE, 
                                       AUDIO_DIR, 
                                       mel_spectrogram, 
                                       SAMPLE_RATE, 
                                       NUM_SAMPLES,
                                       device)

    train_dataloader = create_data_loader(playlist_dataset, BATCH_SIZE)
    
    cnn = CNNNetwork().to(device)
    print(cnn)
    
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)
    
    train(cnn, train_dataloader, loss_fn, optimizer, device, EPOCHS)
    
    torch.save(cnn.state_dict(), "feedforwardnet.pth")
    print("Trained feed forward net save at feedforwardnet.pth")

Using Device:  cpu
CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=414080, out_features=85, bias=True)
  (softmax): Softmax(dim=1)
)
Epochs 1
82
