In [None]:
import os
import torch
import torchvision as tv
from torch.utils.data import DataLoader,random_split,Dataset
import torch.nn as nn
from torchvision import transforms,datasets
import torch.optim as optim
from torchmetrics import ConfusionMatrix
import matplotlib.pyplot as plt
import seaborn as sb
from torchmetrics.classification import MulticlassAccuracy
from torchvision.transforms import Resize,ToTensor,Compose

**Import dataset**

In [1]:
# data_path = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/images_original'
data_path = 'Data/images_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# define dataset split
training_size = 0.7
validation_size = 0.2
testing_size = 0.1

# define epochs
epoch_a=50
epoch_b=100

**Apply transformations**

In [None]:
# initial transformation to find mean and standard deviation of image dataset
transform_all_a = transforms.Compose([
   transforms.Resize((180, 180)), 
    transforms.ToTensor()
])

# find mean and standard deviation of image dataset for normalisation
def find_norm_vals(loader):
    
    # initialise values to 0
    pixels = 0
    mean = 0.0
    stnd = 0.0
    
    for images, _ in loader:
        
        # get batch size, number of channels, height, and width of image
        batch_size, channel_num, height, width = images.shape
        
        # calculate number of pixels       
        pixels += batch_size * height * width
        
        # update mean and standard deviation
        stnd += images.std(axis=(0, 2, 3)).sum()
        mean += images.mean(axis=(0, 2, 3)).sum()
        
    # calculate mean and standard deviation
    stnd /= pixels
    mean /= pixels

    return mean, stnd

gtzan_dataset = datasets.ImageFolder(root=data_path, transform=transform_all_a)


loader = DataLoader(gtzan_dataset, batch_size=64, shuffle=True)

mean, std = find_norm_vals(loader)

print(mean,std)

In [None]:
# transformation for validation and test set
transform_all = transforms.Compose([
   transforms.Resize((180, 180)), 
    transforms.ToTensor()
])

# data augmentation for training set
transform_augment= transforms.Compose([
    transforms.Resize((180, 180)), #resize image as specified
    transforms.RandomHorizontalFlip(),  # flip image horizontally randomly
    transforms.RandomRotation(50),       # rotate image up to 50 degrees
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),  # adjust brightness, contrast, saturation, and hue with 0.5 probability
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)  # normalize image using calculated mean and standard deviation
])

# define data path and sample size 
gtzan_dataset = datasets.ImageFolder(root=data_path)
sample_num = len(gtzan_dataset)

train_samples = int(training_size * sample_num)
val_samples = int(validation_size * sample_num)
test_samples = sample_num - train_samples - val_samples

# define seed for reproducibility 
seed = 2
torch.manual_seed(seed)

# split data
train_data, val_data, test_data = random_split(gtzan_dataset, [train_samples, val_samples, test_samples])

# apply transformations 
train_data.dataset.transform = transform_augment
val_data.dataset.transform = transform_all
test_data.dataset.transform = transform_all

# load data using DataLoader
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# flatten input
# itr=iter(train_loader)
# imgs,labels=next(itr)
# input=torch.flatten(imgs,start_dim=1)


# Net1

In [None]:

# define parameters
input_size=180*180*3
hidden_size1=128
hidden_size2=64
output_size=10

# define model 
class Net1(nn.Module): 
  def __init__(self):
    super(Net1, self).__init__()
    self.net=nn.Sequential(nn.Flatten(start_dim=1),
                            nn.Linear(input_size,hidden_size1),
                            nn.ReLU(),
                            nn.Linear(hidden_size1,hidden_size2),
                            nn.ReLU(),
                            nn.Linear(hidden_size2,output_size))
                          
  def forward(self,x):
 
    return self.net(x)

# move model to gpu
model=Net1()
model.to('cuda') 

# define learning rate as 0.0001 and optimiser as Adam
lr=0.0001 
optimizer=optim.Adam(model.parameters(),lr=lr)

# Net2

In [None]:
# define model
class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        #(3,180,180)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,padding="same")
        #(64,180,180)
        self.conv2=nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding="same")
        #(128,180,180)
        self.pool1=nn.MaxPool2d(2)
        #(128,90,90)
        self.conv3=nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.conv4=nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.pool2=nn.MaxPool2d(2)
        #(256,45,45)
        self.relu=nn.ReLU()
        self.fc1=nn.Linear(in_features=256*45*45,out_features=256)
        self.fc2=nn.Linear(in_features=256,out_features=10)
        self.do1 = nn.Dropout(p=0.8)
        self.bn = nn.BatchNorm2d(256) 
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.pool1(x)
        x=self.conv3(x)
        x=self.relu(x)
        x=self.conv4(x)
        x=self.relu(x)
        x=self.pool2(x)
        x=x.view(x.size()[0],-1)
        x=self.fc1(x)
        x=self.relu(x)
#         x=self.do1(x)
        x=self.fc2(x)
        return x

# move model to gpu
model=Net2()
model.to('cuda')

# define learning rate as 0.0001 and optimiser as Adam
lr=0.0001
optimizer=optim.Adam(model.parameters(),lr=lr)

# Net3

In [None]:
# define model
class Net3(nn.Module):
    def __init__(self):
        super().__init__()
        #(3,180,180)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,padding="same")
        #(64,180,180)
        self.conv2=nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding="same")
        #(128,180,180)
        self.pool1=nn.MaxPool2d(2)
        #(128,90,90)
        self.conv3=nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.conv4=nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.pool2=nn.MaxPool2d(2)
        #(256,45,45)
        self.relu=nn.ReLU()
        self.fc1=nn.Linear(in_features=256*45*45,out_features=256)
        self.fc2=nn.Linear(in_features=256,out_features=10)
        self.do1 = nn.Dropout(p=0.8)
        self.bn = nn.BatchNorm2d(256) 
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.pool1(x)
        x=self.conv3(x)
        x=self.relu(x)
        x= self.bn(x) # addition of batch normalisation layer
        x=self.conv4(x)
        x=self.relu(x)
        x=self.pool2(x)
        x=x.view(x.size()[0],-1)
        x=self.fc1(x)
        x=self.relu(x)
#         x=self.do1(x)
        x=self.fc2(x)
        return x
    
# move model to gpu
model=Net3()
model.to('cuda')

# define learning rate as 0.0001 and optimiser as Adam
lr=0.0001
optimizer=optim.Adam(model.parameters(),lr=lr)


# Net4

In [None]:
# define model 
class Net4(nn.Module):
    def __init__(self):
        super().__init__()
        #(3,180,180)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,padding="same")
        #(64,180,180)
        self.conv2=nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding="same")
        #(128,180,180)
        self.pool1=nn.MaxPool2d(2)
        #(128,90,90)
        self.conv3=nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.conv4=nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,padding="same")
        #(256,90,90)
        self.pool2=nn.MaxPool2d(2)
        #(256,45,45)
        self.relu=nn.ReLU()
        self.fc1=nn.Linear(in_features=256*45*45,out_features=256)
        self.fc2=nn.Linear(in_features=256,out_features=10)
        self.do1 = nn.Dropout(p=0.8)
        self.bn = nn.BatchNorm2d(256) 
        
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.pool1(x)
        x=self.conv3(x)
        x=self.relu(x)
        x= self.bn(x)
        x=self.conv4(x)
        x=self.relu(x)
        x=self.pool2(x)
        x=x.view(x.size()[0],-1)
        x=self.fc1(x)
        x=self.relu(x)
#         x=self.do1(x)
        x=self.fc2(x)
        return x
    
# move model to gpu    
model=Net4()
model.to('cuda')

# define learning rate as 0.0001 and optimiser as RMSprop
lr=0.0001
optimizer = optim.RMSprop(model.parameters(), lr=lr,alpha=0.99)


**Train model**

In [None]:
#track running model
print("Running model:", model.__class__.__name__) 

# define loss function
loss_fn=nn.CrossEntropyLoss() 

# switch in epoch variable as needed
epochs = epoch_a
# epochs = epoch_b

# run training loop 
for epoch in range(epochs):
    
    # initialise values to 0
    running_loss = 0.0 
    correct =0
    total =0
    
    for images, labels in train_loader:
        images, labels = images.to('cuda'), labels.to('cuda')  # move data to gpu
        optimizer.zero_grad()  # reset gradients
        
        # complete forward pass
        outputs = model(images)  
        loss = loss_fn(outputs, labels) 
        
        # complete backward pass
        loss.backward()  
        optimizer.step()  
        
        # update loss     
        running_loss += loss.item() * images.size(0)  
        
        # update accuracy
#         _, predicted = torch.max(outputs, 1) 
#         correct += (predicted == labels).float().sum()

    
#     accuracy = 100 * correct / len(train_loader.dataset)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}')
#     print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy:{accuracy:.2f} ')



**Evaluate model performance**

In [None]:
# define confusion matrix with 10 classes
con_mat=ConfusionMatrix(task='multiclass',num_classes=10).cuda()

# evaluate model accuracy 
def evaluate(loader,model): 
    
    # initialise total count  
    total=0
    
    for imgs,labels in loader:
        # move data to gpu    
        imgs=imgs.cuda() 
        labels=labels.cuda() 
        outputs=model(imgs.cuda()) 
        
        # predicted output        
        _,pred=torch.max(outputs,dim=1) 
        
        # update total correct count and update confusion matrix       
        total+=(pred==labels).sum() 
        con_mat.update(pred,labels)

    return total.item(),(total/len(loader.dataset)).item()

nitems,accuracy=evaluate(test_loader,model) 
print("Test accuracy={:.4f}".format(accuracy))

# output confusion matrix
x=con_mat.compute().cpu().numpy() 
plt.figure(figsize=(10,7)) 
sb.heatmap(x,annot=True,fmt=".0f")