# 1,2,3 Classification
In this tutorial, I'll go throgh some implementations details you maybe use in your final project:
1. how to load the dataset
2. how to use pre-processing
3. how to train the model
4. how to save and load your well-trained model
5. how to test your performance
6. how to obtain predictions from a few images
7. how to visual your results



In [1]:
import librosa

In [2]:
import numpy as np
print(np.__version__)

1.17.4


In [3]:
import torch
import torchvision

In [4]:
# import some libraries you maybe use
import torchvision # an useful library to help I/O (highly recommend). To install this, just do "pip install torchvision"
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
print(torch.__version__)
print(torchvision.__version__)

1.0.1.post2
0.2.2


In [5]:
batch_size = 256

# Data loading and preprocessing
In order to train the model with training data, the first step is to read the data from your folder, database, etc. The below is just an example.

In [23]:
from torchvision.datasets import ImageFolder, DatasetFolder
from torchvision.transforms import Compose, ToTensor, Grayscale, Resize, Normalize
from torch.utils.data import DataLoader
import os
import librosa
print('load done')
# Define path to your dataset
dataset = "./data" # the root folder
trainpath = os.path.join(dataset,"train") # train set
valpath = os.path.join(dataset,"val") # validation set

cut = lambda x: x[:11025]
norm =  lambda x: x.astype(np.float32) / np.max(x)
spct = lambda x: [librosa.feature.melspectrogram(x, sr=44100),librosa.feature.melspectrogram(x, sr=44100),librosa.feature.melspectrogram(x, sr=44100)]
totensor = lambda x: torch.Tensor(x)

tsfm = Compose([
        cut, # rescale to -1 to 1
        norm, # rescale to -1 to 1
        spct, # MFCC 
        totensor
        ])

# Define some operations to preprocess your inputs.
#mytransforms = Compose([Grayscale(num_output_channels=1),Resize((32,32)),ToTensor()])
nploader = np.load
# The above line will work in this flow:
# (PIL_RGB_INPUT) => (PIL_GrayScale_INPUT) => (32x32_PIL_GrayScale_INPUT) => (32x32_Tensor_GrayScale_INPUT)

# Create imagefolder object.
# The ImageFolder(...) is a powerful class to load the data from the folders.
# The data should be arranged in this manner:
# root/dog/xxx.png
# root/dog/xxy.png
# root/dog/xxz.png
# ...
# root/cat/123.png
# root/cat/nsdf3.png
# root/cat/asd932_.png
# =============================================
# To learn more, please visit the website:
# 1. Official API LINK: https://pytorch.org/docs/stable/torchvision/datasets.html#imagefolder
# 2. Good Explaination LINK: https://discuss.pytorch.org/t/questions-about-imagefolder/774/6


load done


In [7]:
traindata = DatasetFolder(root=trainpath, loader=nploader, transform=tsfm, extensions=['npy'])
valdata = DatasetFolder(root=valpath, loader=nploader, transform=tsfm, extensions=['npy'])

# Create a loader
trainloader = DataLoader(traindata,batch_size=batch_size,shuffle=True)
valloader = DataLoader(valdata,batch_size=batch_size,shuffle=True)

In [8]:
print(traindata.classes) # show all classes
print(traindata.class_to_idx) # show the mapping from class to index.

['Frog1', 'Frog2', 'Frog3', 'Grylloidea1', 'Grylloidea2', 'Tettigonioidea1', 'Tettigonioidea2', 'drums_FloorTom', 'drums_HiHat', 'drums_Kick', 'drums_MidTom', 'drums_Ride', 'drums_Rim', 'drums_SmallTom', 'drums_Snare', 'guitar_3rd_fret', 'guitar_7th_fret', 'guitar_9th_fret', 'guitar_chord1', 'guitar_chord2']
{'Frog1': 0, 'Frog2': 1, 'Frog3': 2, 'Grylloidea1': 3, 'Grylloidea2': 4, 'Tettigonioidea1': 5, 'Tettigonioidea2': 6, 'drums_FloorTom': 7, 'drums_HiHat': 8, 'drums_Kick': 9, 'drums_MidTom': 10, 'drums_Ride': 11, 'drums_Rim': 12, 'drums_SmallTom': 13, 'drums_Snare': 14, 'guitar_3rd_fret': 15, 'guitar_7th_fret': 16, 'guitar_9th_fret': 17, 'guitar_chord1': 18, 'guitar_chord2': 19}


In [9]:
idx_to_class = {val: key for key, val in traindata.class_to_idx.items()} # build an inverse mapping for later use
print(idx_to_class)

{0: 'Frog1', 1: 'Frog2', 2: 'Frog3', 3: 'Grylloidea1', 4: 'Grylloidea2', 5: 'Tettigonioidea1', 6: 'Tettigonioidea2', 7: 'drums_FloorTom', 8: 'drums_HiHat', 9: 'drums_Kick', 10: 'drums_MidTom', 11: 'drums_Ride', 12: 'drums_Rim', 13: 'drums_SmallTom', 14: 'drums_Snare', 15: 'guitar_3rd_fret', 16: 'guitar_7th_fret', 17: 'guitar_9th_fret', 18: 'guitar_chord1', 19: 'guitar_chord2'}


In [91]:
correct_idx2class = {9: 'Frog1', 10: 'Frog2', 19: 'Frog3', 3: 'Grylloidea1', 14: 'Grylloidea2', 0: 'Tettigonioidea1', 1: 'Tettigonioidea2', 11: 'drums_FloorTom', 5: 'drums_HiHat', 6: 'drums_Kick', 4: 'drums_MidTom', 16: 'drums_Ride', 13: 'drums_Rim', 7: 'drums_SmallTom', 2: 'drums_Snare', 15: 'guitar_3rd_fret', 12: 'guitar_7th_fret', 18: 'guitar_9th_fret', 17: 'guitar_chord1', 8: 'guitar_chord2'}
print(correct_idx2class)

{9: 'Frog1', 10: 'Frog2', 19: 'Frog3', 3: 'Grylloidea1', 14: 'Grylloidea2', 0: 'Tettigonioidea1', 1: 'Tettigonioidea2', 11: 'drums_FloorTom', 5: 'drums_HiHat', 6: 'drums_Kick', 4: 'drums_MidTom', 16: 'drums_Ride', 13: 'drums_Rim', 7: 'drums_SmallTom', 2: 'drums_Snare', 15: 'guitar_3rd_fret', 12: 'guitar_7th_fret', 18: 'guitar_9th_fret', 17: 'guitar_chord1', 8: 'guitar_chord2'}


In [92]:
correct_class2idx = {val: key for key, val in correct_idx2class.items()}
print(correct_class2idx)

{'Frog1': 9, 'Frog2': 10, 'Frog3': 19, 'Grylloidea1': 3, 'Grylloidea2': 14, 'Tettigonioidea1': 0, 'Tettigonioidea2': 1, 'drums_FloorTom': 11, 'drums_HiHat': 5, 'drums_Kick': 6, 'drums_MidTom': 4, 'drums_Ride': 16, 'drums_Rim': 13, 'drums_SmallTom': 7, 'drums_Snare': 2, 'guitar_3rd_fret': 15, 'guitar_7th_fret': 12, 'guitar_9th_fret': 18, 'guitar_chord1': 17, 'guitar_chord2': 8}


In [94]:
corrected_idx2idx = {val: correct_class2idx[key] for key, val in traindata.class_to_idx.items()}
print(corrected_idx2idx)

{0: 9, 1: 10, 2: 19, 3: 3, 4: 14, 5: 0, 6: 1, 7: 11, 8: 5, 9: 6, 10: 4, 11: 16, 12: 13, 13: 7, 14: 2, 15: 15, 16: 12, 17: 18, 18: 17, 19: 8}


# Build an example network
If you're unfamiliar with this part, please see the HW1 tutorial.

In [10]:
import resnet
model =resnet.resnet18(num_classes= len(traindata.classes))

In [11]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [12]:
class Net(nn.Module):
    def __init__(self,num_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, num_classes)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        #out = out.view(out.size(0), -1)
        
        out  = F.interpolate(out, size=(5, 5), mode='bilinear')  # resize to the size expected by the linear unit
        out = out.view(out.size(0), 5 * 5 * 16)

        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [13]:
net = Net(num_classes=len(traindata.classes)) # initialize your network
net = model
# Whether to use GPU or not?
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
else: 
    device = 'cpu'
print("use",device,"now!")
net.to(device)
optimizer = optim.SGD(net.parameters(), lr=0.05) # setup your optimizer
criterion = nn.CrossEntropyLoss() # setup your criterion

use cuda now!


# Train the model with the data

In [14]:
def save_model(model,filename):
    state = model.state_dict()
    for key in state: state[key] = state[key].clone().cpu()
    torch.save(state, filename)
#save_model(net,"weight.pth")

In [15]:
net.train()
num_epoch = 50
best_loss = 1e8
for epoch in range(num_epoch):
    for batch_idx, (data, target) in enumerate(trainloader):
        #print(data.shape,target)
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 5==0:
            print('epoch %d, iter %d loss: %.3f' %(epoch+1, batch_idx+1, loss.item()))
            if loss.item() < best_loss:
                best_loss = loss.item()
                save_model(net, "weight.pth")

epoch 1, iter 1 loss: 3.284
epoch 1, iter 6 loss: 1.842
epoch 1, iter 11 loss: 1.351
epoch 1, iter 16 loss: 0.915
epoch 1, iter 21 loss: 0.709
epoch 1, iter 26 loss: 0.674
epoch 1, iter 31 loss: 0.518
epoch 1, iter 36 loss: 0.436
epoch 1, iter 41 loss: 0.341
epoch 2, iter 1 loss: 0.370
epoch 2, iter 6 loss: 0.315
epoch 2, iter 11 loss: 0.240
epoch 2, iter 16 loss: 0.180
epoch 2, iter 21 loss: 0.141
epoch 2, iter 26 loss: 0.157
epoch 2, iter 31 loss: 0.177
epoch 2, iter 36 loss: 0.109
epoch 2, iter 41 loss: 0.128
epoch 3, iter 1 loss: 0.291
epoch 3, iter 6 loss: 0.116
epoch 3, iter 11 loss: 0.091
epoch 3, iter 16 loss: 0.055
epoch 3, iter 21 loss: 0.061
epoch 3, iter 26 loss: 0.046
epoch 3, iter 31 loss: 0.090
epoch 3, iter 36 loss: 0.077
epoch 3, iter 41 loss: 0.072
epoch 4, iter 1 loss: 0.046
epoch 4, iter 6 loss: 0.058
epoch 4, iter 11 loss: 0.044
epoch 4, iter 16 loss: 0.050
epoch 4, iter 21 loss: 0.051
epoch 4, iter 26 loss: 0.038
epoch 4, iter 31 loss: 0.029
epoch 4, iter 36 loss:

epoch 31, iter 41 loss: 0.001
epoch 32, iter 1 loss: 0.000
epoch 32, iter 6 loss: 0.001
epoch 32, iter 11 loss: 0.001
epoch 32, iter 16 loss: 0.001
epoch 32, iter 21 loss: 0.001
epoch 32, iter 26 loss: 0.001
epoch 32, iter 31 loss: 0.001
epoch 32, iter 36 loss: 0.001
epoch 32, iter 41 loss: 0.001
epoch 33, iter 1 loss: 0.001
epoch 33, iter 6 loss: 0.001
epoch 33, iter 11 loss: 0.001
epoch 33, iter 16 loss: 0.001
epoch 33, iter 21 loss: 0.001
epoch 33, iter 26 loss: 0.001
epoch 33, iter 31 loss: 0.001
epoch 33, iter 36 loss: 0.001
epoch 33, iter 41 loss: 0.001
epoch 34, iter 1 loss: 0.001
epoch 34, iter 6 loss: 0.001
epoch 34, iter 11 loss: 0.001
epoch 34, iter 16 loss: 0.001
epoch 34, iter 21 loss: 0.001
epoch 34, iter 26 loss: 0.001
epoch 34, iter 31 loss: 0.001
epoch 34, iter 36 loss: 0.000
epoch 34, iter 41 loss: 0.001
epoch 35, iter 1 loss: 0.001
epoch 35, iter 6 loss: 0.001
epoch 35, iter 11 loss: 0.001
epoch 35, iter 16 loss: 0.001
epoch 35, iter 21 loss: 0.001
epoch 35, iter 26 

# Save model

# Load model

In [17]:
def load_model(model,filename):
    model.load_state_dict(torch.load(filename))
    return model
net = Net(num_classes=len(traindata.classes)) # initialize your network
net = model
net = load_model(net,"weight.pth")
# Whether to use GPU or not?
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
else: 
    device = 'cpu'
print("use",device,"now!")
net.to(device)

use cuda now!


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

# Evaluate on validation data

In [51]:
net.eval()
correct = 0
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(valloader):
        print(type(data))
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()
    acc = correct.item() / len(valloader.dataset)
print("Validation Classification Accuracy: %f"%(acc))

<class 'torch.Tensor'>


KeyboardInterrupt: 

In [40]:
test_data = np.load('./data/test.npy', allow_pickle=True)
print(len(test_data))

2387


In [44]:
t_spec = []
for t in test_data:
    test = cut(t)
    test = norm(test)
    test = spct(test) 
    test = totensor(test)
    t_spec.append(test)

In [67]:
print(type(test))
print(type(test.data))

<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [52]:
tensor_x = torch.stack(t_spec)

TypeError: expected Tensor as element 0 in argument 0, but got list

In [59]:
tensor_x[0][0][0][0]

tensor(63.4451)

In [47]:
test_dataset = torch.utils.data.TensorDataset(tensor_x) # create your datset
test_dataloader = torch.utils.data.DataLoader(test_dataset) # create your dataloader

In [80]:
net.eval()
result = []
with torch.no_grad():
    for batch_idx, (data, ) in enumerate(test_dataloader):  
        data = data.to(device)
        #target = target.to(device)
        output = net(data)
        pred = output.data.max(1, keepdim=True)[1]
        #correct += pred.eq(target.data.view_as(pred)).sum()
        result = result + list(pred.cpu().numpy().ravel())
    #acc = correct.item() / len(valloader.dataset)
#print("Validation Classification Accuracy: %f"%(acc))

In [95]:
result = [corrected_idx2idx[idx] for idx in result]

In [96]:
print(result)

[18, 2, 18, 8, 8, 12, 2, 15, 2, 8, 2, 2, 18, 18, 11, 17, 17, 15, 2, 0, 15, 8, 5, 0, 12, 0, 3, 16, 3, 2, 18, 13, 8, 5, 18, 11, 2, 0, 15, 13, 2, 6, 14, 11, 13, 12, 0, 14, 12, 0, 5, 4, 2, 18, 15, 0, 7, 4, 18, 6, 14, 14, 17, 0, 8, 7, 6, 12, 11, 13, 18, 13, 17, 3, 12, 2, 3, 12, 12, 17, 2, 15, 14, 15, 9, 15, 7, 2, 5, 6, 5, 2, 18, 8, 0, 2, 8, 5, 1, 3, 3, 2, 0, 8, 8, 12, 15, 0, 2, 2, 5, 2, 6, 18, 1, 2, 14, 15, 12, 14, 12, 4, 0, 8, 5, 11, 15, 4, 12, 15, 7, 8, 2, 12, 15, 0, 14, 4, 0, 2, 10, 18, 4, 15, 4, 4, 15, 7, 11, 18, 3, 4, 4, 5, 11, 1, 15, 0, 7, 4, 18, 3, 3, 0, 5, 18, 14, 8, 11, 16, 2, 14, 2, 12, 1, 2, 11, 1, 1, 11, 0, 16, 10, 6, 18, 15, 5, 5, 5, 12, 6, 15, 3, 7, 15, 15, 2, 17, 16, 2, 17, 11, 15, 2, 16, 15, 13, 18, 12, 11, 17, 12, 3, 18, 12, 0, 14, 18, 3, 13, 18, 3, 18, 7, 7, 17, 12, 11, 3, 7, 8, 7, 17, 8, 18, 8, 2, 6, 3, 8, 14, 3, 8, 2, 15, 4, 2, 3, 8, 5, 15, 2, 0, 0, 11, 18, 5, 15, 18, 5, 3, 6, 12, 2, 6, 3, 3, 18, 16, 1, 2, 14, 13, 18, 7, 6, 5, 3, 1, 8, 2, 8, 3, 18, 17, 4, 13, 14, 4, 5, 1

In [97]:
df_dict = {'id': list(range(0 ,len(test_data))), 'category':result}

In [98]:
import pandas as pd
df = pd.DataFrame(df_dict)

In [99]:
df.head()

Unnamed: 0,id,category
0,0,18
1,1,2
2,2,18
3,3,8
4,4,8


In [100]:
df.to_csv('result.csv', index=False)

# How to obtain predictions from a few images
Note. There are many ways to do the same things such as customized dataset*. The below is just a simple example how I make an inference on a few images.

* https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader

In [None]:
# fetch filepaths of the testing images
testpath = os.path.join(dataset,"test") # test set
testlist = [os.path.join(testpath,imgpath) for imgpath in os.listdir(testpath)]

In [None]:
# ImagePath => PIL_Image => Tensor
from PIL import Image
testdata = []
for imgpath in testlist:
    # In torchvision, we assume input images are all PIL types
    img = Image.open(imgpath).convert("RGB") # By default, torchvision read images in RGB-fashion
    transimg = mytransforms(img)
    testdata.append(transimg)
testdata = torch.stack(testdata)# list of tensors to tensor
testdata = torch.utils.data.TensorDataset(testdata)
# =========================================================================
# Don't shuffle the image list and set the batch_size = 1
# It's just a trick. You can still figure out another way to achieve the same thing.
testloader = torch.utils.data.DataLoader(testdata,batch_size=1,shuffle=False)

In [None]:
# Testing
net.eval()
result = {}
with torch.no_grad():
    for idx, (data,) in enumerate(testloader):
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        pred_idx = output.data.max(1, keepdim=True)[1]
        pred_class = idx_to_class[pred_idx.cpu().numpy()[0][0]]
        print("predict",testlist[idx],"=>",pred_class)
        result[testlist[idx]] = pred_class

In [None]:
# Visualize your results
import matplotlib.pyplot as plt
size = 8
fig = plt.figure(figsize=(size, size))
columns = len(result)
rows = np.ceil(len(result))
for x, filepath in enumerate(result):
    img = plt.imread(filepath)
    ax = fig.add_subplot(rows, columns, x+1)
    ax.set_title("pred:%s"%(result[filepath]))
    plt.imshow(img,cmap='gray')
plt.show()