# Neural Network Project - Part 2 - summer 2024
### Mohammad Hossein Najafi - 97103938
---

### Libraries

In [67]:
import torch
import numpy as np
import pandas as pd
import torchvision
import torch.nn as nn # use nn functions like sigmoid, ReLu, softmax
import torchvision.transforms as transforms # use for transformerin on data
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
import torchvision.models as models
import os
from PIL import Image
import io
import zipfile
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
import time
from scipy.spatial.distance import pdist, squareform
from sklearn.decomposition import PCA
import json
from tqdm import tqdm

In [68]:
import io
! pip install gdown



In [69]:
! gdown 1hAfkobN0YBk2D2fqhhRht2yKmF1nNwWz

Downloading...
From: https://drive.google.com/uc?id=1hAfkobN0YBk2D2fqhhRht2yKmF1nNwWz
To: /kaggle/working/kaggle.json
100%|█████████████████████████████████████████| 64.0/64.0 [00:00<00:00, 354kB/s]


In [4]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d danielbacioiu/tig-aluminium-5083

Dataset URL: https://www.kaggle.com/datasets/danielbacioiu/tig-aluminium-5083
License(s): CC-BY-SA-4.0
Downloading tig-aluminium-5083.zip to /kaggle/working
100%|██████████████████████████████████████▉| 11.2G/11.2G [01:15<00:00, 172MB/s]
100%|███████████████████████████████████████| 11.2G/11.2G [01:15<00:00, 159MB/s]


### Dataset

In [70]:
class smdataset(Dataset):
    def __init__(self, zip_file_path, folder_name, json_file_path, transform=None):
        self.zip_file = zipfile.ZipFile(zip_file_path, 'r')
        self.folder_name = folder_name.rstrip('/') + '/'
        self.file_list = [name for name in self.zip_file.namelist() if name.startswith(self.folder_name) and name.endswith('.png')]
        self.transform = transform

        # Load labels from JSON file
        with self.zip_file.open(json_file_path) as json_file:
            self.labels_dict = json.load(json_file)


    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        try:
            with self.zip_file.open(self.file_list[idx]) as file:
                image = Image.open(io.BytesIO(file.read()))
                if self.transform:
                    image = self.transform(image)
            
            # Extract the relevant part of the filename for lookup
            file_name = self.file_list[idx].replace(self.folder_name, '')  # Remove folder_name prefix

          
            
            # Get the label for the current file
            label = self.labels_dict.get(file_name, -1) 

          

            return image, label
        except Exception as e:
            return None, None

# Define the necessary transforms (if any)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Update these paths based on the output from the above listing
train_json_path = 'al5083/al5083/train/train.json'  # Correct path from listing
test_json_path = 'al5083/al5083/test/test.json'    # Correct path from listing

# Create the datasets
train_dataset = smdataset(
    zip_file_path='/kaggle/working/tig-aluminium-5083.zip',
    folder_name='al5083/train',
    json_file_path=train_json_path,
    transform=transform
)

test_dataset = smdataset(
    zip_file_path='/kaggle/working/tig-aluminium-5083.zip',
    folder_name='al5083/test',
    json_file_path=test_json_path,
    transform=transform
)



In [71]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True)

test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)

### Model

In [80]:
class CNN(nn.Module):
    def __init__(self, num_classes=6):
        super(CNN, self).__init__()
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(1, 128, kernel_size=5, padding=0, stride=1)
        self.bn1 = nn.BatchNorm2d(128)
        
        self.pool1 = nn.AvgPool2d(kernel_size=5, stride=2)
        
        self.conv2 = nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1)
        self.bn2 = nn.BatchNorm2d(128)
        
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        self.conv4_1 = nn.Conv2d(128, 96, kernel_size=3, padding=0, stride=1)
        self.bn4_1 = nn.BatchNorm2d(96)
        self.conv4_2 = nn.Conv2d(96, 96, kernel_size=3, padding=0, stride=1)
        self.bn4_2 = nn.BatchNorm2d(96)
        
        self.conv5_1 = nn.Conv2d(96, 96, kernel_size=3, padding=0, stride=1)
        self.bn5_1 = nn.BatchNorm2d(96)
        self.conv5_2 = nn.Conv2d(96, 96, kernel_size=3, padding=0, stride=1)
        self.bn5_2 = nn.BatchNorm2d(96)
        
        self.pool2 = nn.AvgPool2d(kernel_size=5, stride=2)
        
        self.dropout = nn.Dropout(0.5)
        
        # Define the fully connected layers
        self.fc1 = nn.Linear(96 * 54 * 54, 128)  # Correct input size based on output of conv layers
        self.dropout_fc1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        # Apply convolutional layers followed by pooling
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)
        
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        
        x = F.relu(self.bn4_1(self.conv4_1(x)))
        x = F.relu(self.bn4_2(self.conv4_2(x)))
        
        x = F.relu(self.bn5_1(self.conv5_1(x)))
        x = F.relu(self.bn5_2(self.conv5_2(x)))
        
        x = self.pool2(x)
        
        x = self.dropout(x)
        
        # Flatten the output from the conv layers to feed into the fully connected layer
        x = x.view(x.size(0), -1)  # Flatten
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout_fc1(x)
        x = self.fc2(x)
        return x


In [81]:
model = CNN()
device = "cuda"
model.to(device)

CNN(
  (conv1): Conv2d(1, 128, kernel_size=(5, 5), stride=(1, 1))
  (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): AvgPool2d(kernel_size=5, stride=2, padding=0)
  (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4_1): Conv2d(128, 96, kernel_size=(3, 3), stride=(1, 1))
  (bn4_1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
  (bn4_2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5_1): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1))
  (bn5_1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5_2): Conv2

In [82]:
total_params = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: {param.data.shape} ({param.numel()} parameters)")
        total_params += param.numel()

print(f"\nTotal number of parameters: {total_params}")

conv1.weight: torch.Size([128, 1, 5, 5]) (3200 parameters)
conv1.bias: torch.Size([128]) (128 parameters)
bn1.weight: torch.Size([128]) (128 parameters)
bn1.bias: torch.Size([128]) (128 parameters)
conv2.weight: torch.Size([128, 128, 3, 3]) (147456 parameters)
conv2.bias: torch.Size([128]) (128 parameters)
bn2.weight: torch.Size([128]) (128 parameters)
bn2.bias: torch.Size([128]) (128 parameters)
conv3.weight: torch.Size([128, 128, 3, 3]) (147456 parameters)
conv3.bias: torch.Size([128]) (128 parameters)
bn3.weight: torch.Size([128]) (128 parameters)
bn3.bias: torch.Size([128]) (128 parameters)
conv4_1.weight: torch.Size([96, 128, 3, 3]) (110592 parameters)
conv4_1.bias: torch.Size([96]) (96 parameters)
bn4_1.weight: torch.Size([96]) (96 parameters)
bn4_1.bias: torch.Size([96]) (96 parameters)
conv4_2.weight: torch.Size([96, 96, 3, 3]) (82944 parameters)
conv4_2.bias: torch.Size([96]) (96 parameters)
bn4_2.weight: torch.Size([96]) (96 parameters)
bn4_2.bias: torch.Size([96]) (96 parame

### Training and Testing


In [None]:
lossfn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-2, weight_decay=1e-4)

In [None]:
num_epoch = 10

for epoch in range(num_epoch):
    # Training phase
    model.train()
    train_loss = 0.0
    train_batches = len(train_dataloader)
    
    with tqdm(total=train_batches, desc=f'Epoch {epoch+1}/{num_epoch}', unit='batch') as pbar:
        for i, (images, labels) in enumerate(train_dataloader):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = lossfn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
            

            if i % 16 == 0:
                pbar.set_postfix({'loss': train_loss / (i + 1)})
                pbar.update(16)
    

Epoch 1/10: 848batch [10:48,  1.31batch/s, loss=0.144]                      
Epoch 2/10: 848batch [10:48,  1.31batch/s, loss=0.112]                      
Epoch 3/10: 848batch [10:50,  1.30batch/s, loss=0.0884]                      
Epoch 4/10: 848batch [10:54,  1.30batch/s, loss=0.071]                       
Epoch 5/10: 848batch [10:51,  1.30batch/s, loss=0.0543]                      
Epoch 6/10: 848batch [10:46,  1.31batch/s, loss=0.0543]                      
Epoch 7/10: 848batch [10:45,  1.31batch/s, loss=0.0443]                      
Epoch 8/10: 848batch [10:45,  1.31batch/s, loss=0.0375]                      
Epoch 9/10: 848batch [10:45,  1.31batch/s, loss=0.0336]                      
Epoch 10/10: 848batch [10:46,  1.31batch/s, loss=0.0317]                      


In [None]:
#Testing
correct = 0
total = 0

for images, labels in tqdm(test_dataloader, desc="Processing Batches"):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    prediction = torch.argmax(outputs, dim=1)
    correct += (prediction == labels).sum().item()
    total += labels.size(0)

accuracy = correct / total * 100
print(f'Accuracy: {accuracy:.2f}%')

Processing Batches: 100%|██████████| 206/206 [01:28<00:00,  2.33it/s]

Accuracy: 58.01%





### FPS

In [None]:
import time

correct = 0
total = 0

# Measure the inference time
start_time = time.time()

for images, labels in tqdm(test_dataloader, desc="Processing Batches"):
    images = images.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(images)
        prediction = torch.argmax(outputs, dim=1)
        correct += (prediction == labels).sum().item()
        total += labels.size(0)

end_time = time.time()

# Calculate FPS
total_time = end_time - start_time
num_frames = total
fps = num_frames / total_time
print(f"FPS: {fps:.2f}")


Processing Batches: 100%|██████████| 206/206 [01:51<00:00,  1.85it/s]

FPS: 59.21



