<a href="https://colab.research.google.com/github/neennera/AI_pitch/blob/main/PyTorchForAudioML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# Dowload Dataset

In [2]:
def download_mnist():
  train_dataset = datasets.MNIST(
      root="data", #ระบุที่เก็บ data ที่พึ่ง dowload มาที่ /data
      download = True, #ระบุให้โหลดมาหากยังไม่ได้โหลด
      train = True,
      transform=ToTensor()
  )
  test_dataset = datasets.MNIST(
      root="data",
      download = True,
      train =  False, # ให้โหลด test
      transform=ToTensor()
  )
  return train_dataset, test_dataset

In [3]:
class FeedForwardNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten() #แปลง array 2D ให้เป็น 1D
    self.dense_layer = nn.Sequential(  #การ pack layer เข้าด้วยกัน
        nn.Linear(28*28,256),  #784 -> 256
        nn.ReLU(),
        nn.Linear(256,10)
    )
    self.softmax = nn.Softmax(dim=1) #จำแนกการทาย class เป็น % 10 class [รวมกันได้ 1]
  
  def forward(self, x):
    x = self.flatten(x)
    x = self.dense_layer(x)
    x = self.softmax(x)
    return x

In [4]:
batch_size = 128
epochs = 10
lr = 0.001
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if __name__ == "__main__" :
  train_dataset, test_dataset = download_mnist()
  train_dataloader=DataLoader(train_dataset,batch_size=batch_size)
  model = FeedForwardNet().to(device)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



# Train model

In [5]:
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=lr)

In [6]:
def train_one_epoch(model, data_loader, loss_fn, optimiser, device):
  for inputs, targets in data_loader :
    inputs, targets = inputs.to(device), targets.to(device)
  
  #calculate loss
  pred = model(inputs)
  loss = loss_fn(pred, targets)
  #acc = Acc_fn(pred, targets)

  #backpropagate + update weight
  optimiser.zero_grad() #ลบ gradiun จาก batch ก่อนๆ
  loss.backward()
  optimiser.step()

  print(f"Loss : {loss.item()}") #\t Acc:{acc}")

def train(model, data_loader, loss_fn, optimiser, device, epoch):
  for i in range(epoch):
    print(f"Epoch : {i+1}")
    train_one_epoch(model, data_loader, loss_fn, optimiser, device)
    print("---------------------------")
  print("Finished training")

In [None]:
train(model, train_dataloader, loss_fn, optimiser, device, epochs)
torch.save(model.state_dict(), "model.pth")
print("Trained feed forward net saved at feedforwardnet.pth")

Epoch : 1
Loss : 2.3032054901123047
---------------------------
Epoch : 2
Loss : 2.2902708053588867
---------------------------
Epoch : 3
Loss : 2.276273012161255
---------------------------
Epoch : 4
Loss : 2.260176658630371
---------------------------
Epoch : 5
Loss : 2.241004467010498
---------------------------
Epoch : 6
Loss : 2.21809458732605
---------------------------
Epoch : 7
Loss : 2.1910974979400635
---------------------------
Epoch : 8
Loss : 2.160165309906006
---------------------------
Epoch : 9
Loss : 2.126034736633301
---------------------------
Epoch : 10
Loss : 2.0899498462677
---------------------------
Finished training
Trained feed forward net saved at feedforwardnet.pth


# Predict



In [None]:
def predict(model, input, target, class_mapping) :
  model.eval() #เป็นสวิตท์ model.eval() ใช่ในการทาย // model.train() ใช้ในการเทรน
  
  with torch.no_grad(): #ใช้ model แบบไม่คำนวน grad
    pred = model(input)
    #print(pred)
    #จะ map ไปเป็น class โดย pred อยู่ในรูป (batch, number , class) -> (1,1,10)
    pred_index = pred[0].argmax(0)
        # pred[0] เอามาแค่ตัวแรก
        # argmax(0)  เลือกเอาอากิวเมนต์ที่มีค่าสูงสุด [เพราะ softmax ทำนายออกมาเป็น % ความน่าจะเป็น]
    pred = class_mapping[pred_index]
    expected = class_mapping[target]
 
  return pred, expected

In [None]:
class_mapping = [
    "0","1","2","3","4","5","6","7","8","9"
]

if __name__ == "__main__" :
  _ , validation_data = download_mnist()
  #validation_data = ดาต้าที่ใช้ตรวจสอบภความถูกต้อง
  
  model = FeedForwardNet()
  state_dict = torch.load("model.pth")
  model.load_state_dict(state_dict)
  
  for i in range(10) :
    input, target = validation_data[i][0], validation_data[i][1]
    predicted, expected = predict(model, input, target, class_mapping)
    print(f"Predicted: '{predicted}', expected: '{expected}'")

Predicted: '7', expected: '7'
Predicted: '3', expected: '2'
Predicted: '3', expected: '1'
Predicted: '6', expected: '0'
Predicted: '3', expected: '4'
Predicted: '8', expected: '1'
Predicted: '3', expected: '4'
Predicted: '3', expected: '9'
Predicted: '6', expected: '5'
Predicted: '7', expected: '9'


# Conv Model

In [7]:
class CNNNetwork(nn.Module):

    def __init__(self):
        super().__init__()
        # 4 conv blocks / flatten / linear / softmax
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=3,
                stride=1,
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(128 * 5 * 4, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_data):
        x = self.conv1(input_data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.flatten(x)
        logits = self.linear(x)
        predictions = self.softmax(logits)
        return 

In [9]:
from torchsummary import summary
if __name__ == "__main__":
    cnn = CNNNetwork()
    summary(cnn, (1, 64, 44))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 66, 46]             160
              ReLU-2           [-1, 16, 66, 46]               0
         MaxPool2d-3           [-1, 16, 33, 23]               0
            Conv2d-4           [-1, 32, 35, 25]           4,640
              ReLU-5           [-1, 32, 35, 25]               0
         MaxPool2d-6           [-1, 32, 17, 12]               0
            Conv2d-7           [-1, 64, 19, 14]          18,496
              ReLU-8           [-1, 64, 19, 14]               0
         MaxPool2d-9             [-1, 64, 9, 7]               0
           Conv2d-10           [-1, 128, 11, 9]          73,856
             ReLU-11           [-1, 128, 11, 9]               0
        MaxPool2d-12            [-1, 128, 5, 4]               0
          Flatten-13                 [-1, 2560]               0
           Linear-14                   