In [11]:
import os
from os import walk
import copy
import pickle

import numpy as np
from tqdm import tqdm

import torch
import torchvision
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from torchinfo import summary

['.git', '.gitignore', '.ipynb_checkpoints', 'model.ipynb', 'preprocess.ipynb', 'README.md', 'test1.wav', 'test2.wav', 'train.ipynb']
PyTorch Version:  2.1.0
Torchvision Version:  0.16.0
Using the GPU!


In [12]:
# implementation of VGG-BN for 1*125*100 inputs
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=(1,1)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True))

        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, padding=(1,1)),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True)
        )

        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation = 1, ceil_mode=False)

        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=(1,1)),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace = True))

        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, padding=(1,1)),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace = True))

        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation = 1, ceil_mode=False)

        self.conv5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=(1,1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace = True))

        self.conv6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, padding=(1,1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace = True))

        self.conv7 = nn.Sequential(
            nn.Conv2d(256, 256, 3, padding=(1,1)),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace = True))

        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation = 1, ceil_mode=False)

        self.conv8 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.conv9 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.conv10 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation = 1, ceil_mode=False)

        self.conv11 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.conv12 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.conv13 = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=(1,1)),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace = True))

        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation = 1, ceil_mode=False)


        self.adaptive = nn.AdaptiveAvgPool2d(output_size=(7,7))

        self.fc1 = nn.Sequential(
            nn.Linear(in_features=25088, out_features = 4096, bias = True),
            nn.ReLU(inplace = True),
            nn.Dropout(p=0.3, inplace = False))

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=4096, out_features = 4096, bias = True),
            nn.ReLU(inplace = True),
            nn.Dropout(p=0.3, inplace = False))

        self.fc3 = nn.Linear(in_features=4096, out_features = 10, bias = True)

    def forward(self, x):
        x = self.conv1(x) 
        x = self.conv2(x) 
        x = self.pool1(x) 
        x = self.conv3(x) 
        x = self.conv4(x) 
        x = self.pool2(x) 
        x = self.conv5(x) 
        x = self.conv6(x)   
        x = self.conv7(x)  
        x = self.pool3(x) 
        x = self.conv8(x) 
        x = self.conv9(x) 
        x = self.conv10(x)
        x = self.pool4(x)
        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.pool5(x)
        x = self.adaptive(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

Net(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (conv4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (pool2): MaxPool2d(kernel_s

In [13]:
# summary(net, input_size = (32, 1, 100, 100))


Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [32, 7]                   --
├─Sequential: 1-1                        [32, 64, 100, 100]        --
│    └─Conv2d: 2-1                       [32, 64, 100, 100]        640
│    └─BatchNorm2d: 2-2                  [32, 64, 100, 100]        128
│    └─ReLU: 2-3                         [32, 64, 100, 100]        --
├─Sequential: 1-2                        [32, 64, 100, 100]        --
│    └─Conv2d: 2-4                       [32, 64, 100, 100]        36,928
│    └─BatchNorm2d: 2-5                  [32, 64, 100, 100]        128
│    └─ReLU: 2-6                         [32, 64, 100, 100]        --
├─MaxPool2d: 1-3                         [32, 64, 50, 50]          --
├─Sequential: 1-4                        [32, 128, 50, 50]         --
│    └─Conv2d: 2-7                       [32, 128, 50, 50]         73,856
│    └─BatchNorm2d: 2-8                  [32, 128, 50, 50]         256
│  