<a href="https://colab.research.google.com/github/shon-otmazgin/gcommand_recognition_cnn/blob/main/vgg11_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!rm -rf gcommands
!rm -rf gcommand_dataset.py

In [None]:
# copy the data files + gcommand_dataset_cnn.py
# data files:
# https://drive.google.com/file/d/1Gqo0pQxuRysNtKecDBk_jvk8zK4BrYbR/view
# ‘gcommand_dataset_cnn.py’:
# https://drive.google.com/file/d/1JyQWSOTfPtyCV8O9_XoE0Q5W5dqtMaav/view?usp=sharing
# Notebook:
# https://drive.google.com/file/d/1VElIp4e4JcB9ksGASBh47ixN8hkUx8Y3/view?usp=sharing

!cp drive/MyDrive/Colab\ Notebooks/gcommand_dataset_cnn.py gcommand_dataset_cnn.py
!unzip -qq drive/MyDrive/ex5_data.zip -d gcommands

In [None]:
# create test dir under test dir. this is the fix Yossi mentioned in the piazza

!mkdir gcommands/test/test/
!mv gcommands/test/* gcommands/test/test/

mv: cannot move 'gcommands/test/test' to a subdirectory of itself, 'gcommands/test/test/test'


In [None]:
from gcommand_dataset_cnn import GCommandLoader
import torch
from torch import optim
import torch.nn as nn
from torch.functional import F
import numpy as np
import sys

In [None]:
def train(model, optimizer, train_loader, val_loader, epochs=10):
    global device
    train_loss = 0
    train_correct = 0
    for e in range(epochs):
        model.train()
        for batch_idx, (data, labels) in enumerate(train_loader):
            data, labels = data.to(device), labels.to(device)
            
            optimizer.zero_grad()
            output = model(data)
            
            loss = F.nll_loss(input=output, target=labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            pred = output.max(dim=1, keepdim=True)[1]  # get the index of the max log-probability
            train_correct += pred.eq(labels.view_as(pred)).cpu().sum().item()

        train_loss /= len(train_loader.dataset)
        train_correct /= len(train_loader.dataset)
        if val_loader:
            val_loss, val_acc = test(model=model, loader=val_loader)
        else:
            val_loss, val_acc = None, None

        print(f'Epoch: {e + 1} [{(e + 1)}/{epochs}] Train Loss: {train_loss:.3f}, Val Loss: {val_loss:.3f}')
        print(f'Epoch: {e + 1} [{(e + 1)}/{epochs}] Train ACC:  {train_correct:.3f},  Val ACC:  {val_acc:.3f}')

In [None]:
def test(model, loader):
    global device
    model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            
            output = model(data)
            loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.max(dim=1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).cpu().sum().item()

    loss /= len(loader.dataset)
    return loss, correct / len(loader.dataset)

In [None]:
def _make_layers(cfg):
    layers = []
    in_channels = 1
    for x in cfg:
        if x == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                       nn.BatchNorm2d(x),
                       nn.ReLU(inplace=True)]
            in_channels = x
    layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
    return nn.Sequential(*layers)

In [None]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        arch = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

        self.conv = _make_layers(arch)
        self.fc1 = nn.Linear(7680, 512)
        self.fc2 = nn.Linear(512, 30)

    def forward(self, x):
        x = self.conv(x)
     
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

In [None]:
train_set = GCommandLoader('gcommands/train')
val_set = GCommandLoader('gcommands/valid')
test_set = GCommandLoader('gcommands/test')

batch_size = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device == "cuda":
    num_workers = 1
    pin_memory = True
else:
    num_workers = 0
    pin_memory = False

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

In [None]:
print(len(train_loader.dataset))
print(len(val_loader.dataset))
print(len(test_loader.dataset))

30000
6798
6835


In [None]:
epochs = 7

model = CNN()
model.to(device)

adam = optim.Adam(model.parameters(), lr=0.0001)
train(model=model, optimizer=adam, train_loader=train_loader, val_loader=val_loader, epochs=epochs)

Epoch: 1 [1/7] Train Loss: 0.009, Val Loss: 0.383
Epoch: 1 [1/7] Train ACC:  0.761,  Val ACC:  0.884
Epoch: 2 [2/7] Train Loss: 0.002, Val Loss: 0.358
Epoch: 2 [2/7] Train ACC:  0.942,  Val ACC:  0.896
Epoch: 3 [3/7] Train Loss: 0.001, Val Loss: 0.309
Epoch: 3 [3/7] Train ACC:  0.963,  Val ACC:  0.913
Epoch: 4 [4/7] Train Loss: 0.001, Val Loss: 0.297
Epoch: 4 [4/7] Train ACC:  0.976,  Val ACC:  0.918
Epoch: 5 [5/7] Train Loss: 0.001, Val Loss: 0.267
Epoch: 5 [5/7] Train ACC:  0.983,  Val ACC:  0.925
Epoch: 6 [6/7] Train Loss: 0.000, Val Loss: 0.432
Epoch: 6 [6/7] Train ACC:  0.987,  Val ACC:  0.896
Epoch: 7 [7/7] Train Loss: 0.000, Val Loss: 0.346
Epoch: 7 [7/7] Train ACC:  0.992,  Val ACC:  0.915


In [None]:
def predict(model, loader):
    preds = []
    model.eval()
    with torch.no_grad():
        for data, labels in loader:
            data, labels = data.to(device), labels.to(device)
            
            output = model(data)
            preds.append(output.max(dim=1, keepdim=True)[1])

    return torch.cat(preds, dim=0).detach()

In [None]:
y_hat = predict(model=model, loader=test_loader)

In [None]:
index_2_classes = {i:c for c, i in train_loader.dataset.class_to_idx.items()}
if sys.platform == 'linux':
    X = [x.rsplit('/', 1)[1] for x, y in test_loader.dataset.spects]
else:
    X = [x.rsplit('\\', 1)[1] for x, y in test_loader.dataset.spects]
output = [f'{x},{index_2_classes[y.item()]}\n' for x, y in zip(X, y_hat)]
output = sorted(output, key=lambda x: int(x.split('.')[0]))
with open('test_y', 'w') as f:
    f.writelines(output)
!cp test_y drive/MyDrive/Colab\ Notebooks/test_y