In [1]:
import os
import time
import glob
import torch
import numpy as np
import pandas as pd
import librosa
from torch.utils.data.dataset import random_split
from torch.utils.data import Dataset, DataLoader
import torch.utils.model_zoo as model_zoo
import torch.optim as optim
import torch.nn as nn

In [86]:
SAVEPATH = "save/"
SPLIT_RATE = 0.9
BATCH_SIZE = 20

SAVE_EVERY = 8
DEV_EVERY = 8
LOG_EVERY = 4

In [2]:
# filepath = "data/maestro-v3.0.0_modify.csv"
# data = pd.read_csv(filepath)
# # Subset for  composer identification
# counts = data["canonical_composer"].value_counts()
# selected_composers = counts[counts >= 5].index #24
# selected_composers = np.asarray(selected_composers)
# selection = pd.DataFrame(columns=data.columns)
# for i in selected_composers:
#     choices = data[data["canonical_composer"] == i].head()
#     selection = pd.concat([selection, choices], ignore_index=True)
# selection.to_csv("composer_selection.csv")
# # Subset for performer identificaion
# counts = data["performer"].value_counts()
# selected_performers = counts[counts >= 10].index #24
# selected_performers = np.asarray(selected_performers)
# selection = pd.DataFrame(columns=data.columns)
# for i in selected_performers:
#     choices = data[data["performer"] == i].head(10)
#     selection = pd.concat([selection, choices], ignore_index=True)
# selection.to_csv("performer_selection.csv")

In [40]:
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, 1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, input):
        residual = input
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.downsample:
            residual = self.downsample(residual)
        x += residual
        x = self.relu(x)
        return x


class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BottleNeck, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv1 = nn.Conv2d(out_channels, out_channels*self.expansion, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels*self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
    def forward(self, input):
        residual = input
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)
        if self.downsample:
            residual = self.downsample(residual)
        x += residual
        x = self.relu(x)
        return x

class Resnet(nn.Module):
    # 224*224
    def __init__(self, block, num_layer, n_classes=24, input_channels=16, name="resnet18"):
        super(Resnet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 64, num_layer[0])
        self.layer2 = self._make_layer(block, 128, num_layer[1], 2)
        self.layer3 = self._make_layer(block, 256, num_layer[2], 2)
        self.layer4 = self._make_layer(block, 512, num_layer[3], 2)
        self.avgpool = nn.AvgPool2d(kernel_size=2, stride=1)
        self.fc = nn.Linear(9*512, n_classes)
        self.name = name

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 0.0)


    def _make_layer(self, block, out_channels, num_block, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels*block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels*block.expansion, 1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*block.expansion)
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels*block.expansion
        for _ in range(1, num_block):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
#         print(x.shape)
        x = x.view(x.size(0), -1)
#         print(x.shape)
        x = self.fc(x)
        return x
    
    def _class_name(self):
        return self.name


def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = Resnet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model


def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = Resnet(BasicBlock, [3, 4, 6, 3], name="resnet34", **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    return model


def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = Resnet(BottleNeck, [3, 4, 6, 3], name="resnet50", **kwargs)
    if pretrained:
        # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
        model_path = './initmodel/resnet50_v2.pth'
        model.load_state_dict(torch.load(model_path), strict=False)
    return model

def resnet101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = Resnet(BottleNeck, [3, 4, 23, 3], name="resnet101", **kwargs)
    if pretrained:
        # model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
        model_path = './initmodel/resnet101_v2.pth'
        model.load_state_dict(torch.load(model_path), strict=False)
    return model

def resnet152(pretrained=False, **kwargs):
    """Constructs a ResNet-152 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = Resnet(BottleNeck, [3, 8, 36, 3], name="resnet152", **kwargs)
    if pretrained:
        # model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
        model_path = './initmodel/resnet152_v2.pth'
        model.load_state_dict(torch.load(model_path), strict=False)
    return model

In [84]:
class DealDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data
        self.len = x_data.shape[0]
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

def data_loader(data_X, data_y):    
    data = DealDataset(data_X, data_y)
    size = data.len
    loader = DataLoader(dataset=data,           
                    batch_size=BATCH_SIZE, 
                    shuffle=True,
                    num_workers=2)
    return loader

def checkpoint(net, save_path, acc, loss, iterations):
    snapshot_prefix = os.path.join(save_path, 'snapshot_' + net._class_name())
    snapshot_path = snapshot_prefix + '_acc_{:.2f}_loss_{:.4f}_iter_{}_model.pt'.format(acc, loss, iterations)
    torch.save(net, snapshot_path)
    for f in glob.glob(snapshot_prefix + '*'):
        if f != snapshot_path:
            os.remove(f)

def train(optimizer, criterion, net, device, epoches, save_path=SAVEPATH):
    iterations = 0
    start = time.time()
    
    best_dev_acc = -1; best_snapshot_path = ''
    header = '  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy'
    dev_log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.1f}%,{:>7.4f},{:8.4f},{:12.4f}%,{:12.4f}%'.split(','))
    log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.1f}%,{:>7.4f},{},{:12.4f}%,{}'.split(','))
    
    if os.path.isdir(save_path) == False:
        os.makedirs(save_path)
    print(header)

    train_loader = data_loader(np.load("X_train.npy"), np.load("y_train.npy"))
    dev_loader = data_loader(np.load("X_val.npy"), np.load("y_val.npy"))

    for epoch in range(epoches):  # loop over the dataset multiple times
        correct, total, print_loss, n= 0, 0, 0, 0
        for i, data in enumerate(train_loader, 0):
            iterations += 1
            # get the inputs
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs.float()) 
            
            total += labels.size(0)
            labels = labels.argmax(dim=1)
            correct += (outputs.argmax(dim=1) == labels).sum().item()
            
            
            loss = criterion(outputs, labels)
            print_loss += loss.item()
            loss.backward()
            optimizer.step()  
        
            # compute accuracy 
            acc = correct / total * 100
        
            # checkpoint model periodically
            if iterations % SAVE_EVERY == 0:
                checkpoint(net, save_path, acc, print_loss/n, iterations)
            
            # validation model periodically
            if iterations % DEV_EVERY == 0:
                # calculate accuracy on validation set
                dev_correct, dev_total, dev_loss , m = 0, 0, 0, 0
                with torch.no_grad():
                    for dev_batch_idx, dev_batch in enumerate(dev_loader, 0):
                        signals, labels = dev_batch
                        signals = signals.to(device)
                        labels = labels.to(device)
                        
                        predicts = net(signals.float())
                        labels = labels.argmax(dim=1)
                        dev_loss += criterion(predicts, labels).item()
                        dev_correct += (torch.max(predicts, 1)[1].view(-1) == labels).sum().item()
                        dev_total += labels.size(0)
                        m += 1
                dev_acc = 100. * dev_correct / dev_total

                print(dev_log_template.format(time.time()-start,
                    epoch, iterations, 1+i, len(train_loader),
                    100. * (1+i) / len(train_loader), print_loss/n, dev_loss/m, acc, dev_acc))

                # update best valiation set accuracy
                if dev_acc > best_dev_acc:

                    # found a model with better validation set accuracy

                    best_dev_acc = dev_acc
                    snapshot_prefix = os.path.join(save_path, 'best_snapshot_' + net._class_name())
                    best_snapshot_path = snapshot_prefix + '_devacc_{:.2f}_devloss_{:.4f}__iter_{}_model.pt'.format(dev_acc, dev_loss/m, iterations)

                    # save model, delete previous 'best_snapshot' files
                    torch.save(net, best_snapshot_path)
                    for f in glob.glob(snapshot_prefix + '*'):
                        if f != best_snapshot_path:
                            os.remove(f)

            elif iterations % LOG_EVERY == 0:
                # print progress message
                print(log_template.format(time.time()-start,
                    epoch, iterations, 1+i, len(train_loader),
                    100. * (1+i) / len(train_loader), print_loss/n, ' '*8, acc, ' '*12))
            n += 1
    print('Finished Training')
    return net, best_snapshot_path

def test(net, fp, validation, device):
    correct = 0
    total = 0
    
    test_loader = data_loader("test", True)
    y_pred = []
    y_true = []

    if validation:
        net = torch.load(fp)

    with torch.no_grad():
        for data in test_loader:
            signals, labels = data
            signals = signals.to(device)
            labels = labels.to(device)
            for i in labels.view(-1):
                y_true.append(i.view(-1).tolist())
            
            outputs = net(signals.float())
            predicted = outputs.data.argmax(dim=1)
            for i in predicted.view(-1) + 1:
                y_pred.append(i.view(-1).tolist())
            
            labels = labels.view(-1) - 1
            correct += (predicted.view(-1) == labels).sum().item()
            total += labels.size(0)
    
    acc = 100 * correct / total
    print(total,correct)
    print('Accuracy: %.2f %%' % acc)
    return acc, np.ravel(y_true), np.ravel(y_pred)

In [87]:
if __name__ == "__main__":
    if torch.cuda.is_available():
        torch.cuda.set_device(1)
        device = torch.device('cuda:{}'.format(1))
        print("Using GPU for training")
    else:
        device = torch.device('cpu')

    print('\n----------------------------- EXPERIMENT -----------------------------')
    net = resnet34().to(device)
    optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.001)
    criterion = nn.CrossEntropyLoss()
    net, fp = train(optimizer, criterion, net, device, 50)

Using GPU for training

----------------------------- EXPERIMENT -----------------------------
  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy
     1     0         4     4/4       100.0%  9.4170                2.7778%             
     3     1         8     4/4       100.0%  6.8752   3.6328       2.7778%       4.3478%
     5     2        12     4/4       100.0%  5.4583                2.7778%             
     8     3        16     4/4       100.0%  4.3355   4.0349      11.1111%       4.3478%
     9     4        20     4/4       100.0%  4.3579                9.7222%             
    12     5        24     4/4       100.0%  4.7051   5.2820       9.7222%       4.3478%
    12     6        28     4/4       100.0%  4.2466               16.6667%             
    15     7        32     4/4       100.0%  3.9408   5.9665      16.6667%       4.3478%
    16     8        36     4/4       100.0%  3.4165               33.3333%             
    18     9        

In [91]:
pd.DataFrame(np.load("y_train.npy").argmax(axis=1)).value_counts()

23    3
22    3
1     3
2     3
3     3
4     3
5     3
6     3
7     3
8     3
9     3
10    3
11    3
12    3
13    3
14    3
15    3
16    3
17    3
18    3
19    3
20    3
21    3
0     3
dtype: int64