In [4]:
import torch as th 
import torch.nn as nn 
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import os 
os.chdir(r"C:\Users\Mudassar\Desktop\Z_Image")
print("Now working in:", os.getcwd())


import pandas as pd

import torch


import skimage as ski
from skimage import io, color

import torch.optim as optim


import torchvision as tv
import torchvision.transforms as transforms

import numpy as np 
import matplotlib
from matplotlib import pyplot as plt


from torchsummary import summary 

print ("Name and Versions of libraries")
print ("PyTorch version:", th.__version__)
print ("Torchvision version:", tv.__version__)
print ("Numpy version:", np.__version__)
print ("Matplotlib version:", matplotlib.__version__)
print("Pandas Version:", pd.__version__)
print ("Scikit-image version:", ski.__version__)

print ("Device:", th.device("cuda" if th.cuda.is_available() else "cpu"))
if th.cuda.is_available():
    print(f"GPU device name: {th.cuda.get_device_name(0)}")
else:
    print("GPU device not available.")

Now working in: C:\Users\Mudassar\Desktop\Z_Image
Name and Versions of libraries
PyTorch version: 2.8.0+cu129
Torchvision version: 0.23.0+cu129
Numpy version: 2.1.2
Matplotlib version: 3.10.6
Pandas Version: 2.3.2
Scikit-image version: 0.25.2
Device: cuda
GPU device name: NVIDIA GeForce RTX 4060 Laptop GPU


In [None]:
# --- Creating a dataset ---



class CustomDatasetCarsANDTanks(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform


        # -- filtering out rows with missing files
        for _, row in self.data.iterrows():
            img_path = os.path.join(self.root_dir, row.iloc[0])
            if not os.path.isfile(img_path):
                print(f"Warning: File {img_path} not found. Skipping this entry.")
                self.data = self.data.drop(_)

        # -- Automatically handle string labels if they exist (e.g., "car", "tank")
        if isinstance(self.data.iloc[0, 1], str):
            classes = sorted(self.data.iloc[:, 1].unique())
            self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
        else:
            self.class_to_idx = None

    def __len__(self):
        return len(self.data)  # -- around 3400 images now upgraded to 6k lol after adding more tanks  and after i lost my data i upgraded to 12k

    def __getitem__(self, index):
        if th.is_tensor(index):
            index = index.tolist()

        img_name = os.path.join(self.root_dir, self.data.iloc[index, 0])  # csv file

        # Read image with skimage.io
        try:
            image = io.imread(img_name)
        except FileNotFoundError:
            # -- erase entry if the data is missing
            raise FileNotFoundError(f"Missing file: {img_name}")
        except Exception as e:
            raise RuntimeError(f"Error reading {img_name}: {e}")

        label_value = self.data.iloc[index, 1]
        if self.class_to_idx:
            #  -- map string label to index
            label = th.tensor(self.class_to_idx[label_value], dtype=th.long)
        else:
            label = th.tensor(int(label_value), dtype=th.long)

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# -- Checking if files are actually there -- i wanna know the bach size too == also importing it
print("CSV ", os.path.exists("Cars_Tanks.csv"))
print("Training folder", os.path.exists("train"))

dataset = CustomDatasetCarsANDTanks(
    csv_file="Cars_Tanks.csv",
    root_dir="train",
    transform=transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
)
df = pd.read_csv("Cars_Tanks.csv")
print("Length of dataset:", len(df))  # around 3348 images now with added tanks 6k lol now after i lost my data i upgraded to 12k


# -- now trying to see the images in the files and comparing them against the csv entries to get the actual number of valid images

image_extensions = {'.jpg', '.jpeg', '.png', '.gif'}
folder_path = "train" # path to the folder containing images

image_files = [
    f for f in os.listdir(folder_path)
    if os.path.splitext(f)[1].lower() in image_extensions   # -- checking for valid image extensions
]

print("Images in folder:", len(image_files))   # -- number of images in the folder


valid_extensions = {'.jpg', '.jpeg', '.png', '.gif'}   # for the intersection check

csv_files = set(df.iloc[:, 0].astype(str))    # -- getting the file names as strings from the csv file
folder_files = set(
    f for f in os.listdir(folder_path)
    if os.path.splitext(f)[1].lower() in valid_extensions   # same concept as above
)


matched_files = csv_files.intersection(folder_files)   # -- getting the common files in both csv and folder

print(" Valid image from both CSV file and the actual folder :", len(matched_files))








CSV  True
Training folder True
Length of dataset: 12252
Images in folder: 12246
 Valid image from both CSV file and the actual folder : 12216


In [20]:
# -- Creating a dataset loader --

# -- dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)-- (dont even need it)

train_set, val_set, test_set = torch.utils.data.random_split(dataset, [8551, 1222, 2443])  # -- 70/10/20 split
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)   # -- pin memeory to boos time


In [21]:
print("Number of training batches:", len(train_loader))
print("Number of validation batches:", len(val_loader))
print("Number of testing batches:", len(test_loader))

Number of training batches: 268
Number of validation batches: 77
Number of testing batches: 77


In [22]:
# -- to check the shape, tho i am already confident <@:)
print(f"A batch shape: {train_loader.dataset[0][0].shape}")  

# --here 3 represents the colors which is RGB and 224,224 is the image size after resizing nice:)---

A batch shape: torch.Size([3, 224, 224])


In [23]:
# -- Defining the CNN Model --

device = th.device("cuda" if th.cuda.is_available() else "cpu")   # -- added this line to specify device --

"""

OLD MODEL IGNORE

class ConvNeuralNet(nn.Module):
    def __init__(self):
        super(ConvNeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)  
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 2)  # 2 output classes: car and tank

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 56 * 56)  # -- basically we are flattening it here --
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
""" 
class ConvNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1) 
        self.bn1 = nn.BatchNorm2d(16)                                                              # -- added batch normalization -- since we are going to add more conv layers- padding to maintain size

        # no pooling
    
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(32)  

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)          # 1st pooling layer
    
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        # no pooling


        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(128)

        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)         # 2nd pooling layer

        
    
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(256)

        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)         # 3rd pooling layer

        #--- Fully connected layers ---


        self.fc1 = nn.Linear(256 * 28 * 28, 512)                        # first fully connected layer
        self.dropout1 = nn.Dropout(0.5)                                 # for stabilization
        self.fc2 = nn.Linear(512, 2)                                    # second fully connected layer for 2 classes

    def forward(self, x):

        x = F.relu(self.bn1(self.conv1(x)))           # relu to skip linearities
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)


        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool2(x)


        x = F.relu(self.bn5(self.conv5(x)))
        x = self.pool3(x)

        x = torch.flatten(x, 1)   # -- flattening -- safer than view

        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return x



# -- currently i have 2 conv layers, 2 pooling layers and 2 fully connected layers -- planning to add more... 25/10/2025 so added more conv layers specifically 5 conv layers and 3 pooling layers now still 25/10/2025 :) fast doer



net = ConvNeuralNet()
net.to(device)

ConvNeuralNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_

In [24]:
summary(net, (3, 224, 224))  # i mean the input size is (3, 224, 224 as seen from module 7

# from the code below we can see that the output shape becomes half after each pooling layer

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             448
       BatchNorm2d-2         [-1, 16, 224, 224]              32
            Conv2d-3         [-1, 32, 224, 224]           4,640
       BatchNorm2d-4         [-1, 32, 224, 224]              64
         MaxPool2d-5         [-1, 32, 112, 112]               0
            Conv2d-6         [-1, 64, 112, 112]          18,496
       BatchNorm2d-7         [-1, 64, 112, 112]             128
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
      BatchNorm2d-12          [-1, 256, 56, 56]             512
        MaxPool2d-13          [-1, 256, 28, 28]               0
           Linear-14                  [

In [25]:
# -- now training the model --

import torch.optim as optim # -- since we did not import it earlier --
from torch.optim.lr_scheduler import StepLR  #-- for learning rate scheduler --

from tqdm import tqdm   # -- for progress bar --

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay= 1e-4)  #-- 0.001 was my initial learning rate but i increased it to 0.42 nvm get it back to 0.001 -- weight decay for regularization

scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # -- learning rate scheduler to reduce LR every 5 epochs by a factor of 0.5
# loss = criterion(outputs, labels) # adding this later 



# -- an error is comming up in my CSV files ofc man i didntt clean them properly -- lets fix it 
# all the fixes are above in the dataset class ( importing dataset class module specifically )


# -- after i added more tanks then another error came up -- lets fix it
# basically the files are missing but are still in the CSV so i guess i will again tinker with the dataset class to see what i can do man this is getting annoying


epchos = 19
for epoch in range(epchos):
    running_loss = 0.0
    for i, data in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epchos}"), 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}") # -- printing average loss over last 100 batches --
            running_loss = 0.0

    
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy of the model on the validation images: {accuracy:.2f}%') 
    


Epoch 1/19:  37%|███▋      | 100/268 [00:48<01:24,  1.99it/s]

[1, 100] loss: 13.304


Epoch 1/19:  75%|███████▍  | 200/268 [01:44<00:40,  1.66it/s]

[1, 200] loss: 0.756


Epoch 1/19: 100%|██████████| 268/268 [02:21<00:00,  1.89it/s]
Epoch 2/19:  37%|███▋      | 100/268 [00:54<01:26,  1.95it/s]

[2, 100] loss: 0.478


Epoch 2/19:  75%|███████▍  | 200/268 [01:53<00:37,  1.79it/s]

[2, 200] loss: 0.457


Epoch 2/19: 100%|██████████| 268/268 [02:30<00:00,  1.78it/s]
Epoch 3/19:  37%|███▋      | 100/268 [00:55<01:42,  1.64it/s]

[3, 100] loss: 0.450


Epoch 3/19:  75%|███████▍  | 200/268 [01:52<00:38,  1.75it/s]

[3, 200] loss: 0.418


Epoch 3/19: 100%|██████████| 268/268 [02:31<00:00,  1.77it/s]
Epoch 4/19:  37%|███▋      | 100/268 [00:55<01:33,  1.80it/s]

[4, 100] loss: 0.393


Epoch 4/19:  75%|███████▍  | 200/268 [01:51<00:35,  1.90it/s]

[4, 200] loss: 0.408


Epoch 4/19: 100%|██████████| 268/268 [02:30<00:00,  1.78it/s]
Epoch 5/19:  37%|███▋      | 100/268 [00:56<01:33,  1.79it/s]

[5, 100] loss: 0.389


Epoch 5/19:  75%|███████▍  | 200/268 [01:51<00:38,  1.78it/s]

[5, 200] loss: 0.398


Epoch 5/19: 100%|██████████| 268/268 [02:30<00:00,  1.79it/s]
Epoch 6/19:  37%|███▋      | 100/268 [00:56<01:40,  1.68it/s]

[6, 100] loss: 0.406


Epoch 6/19:  75%|███████▍  | 200/268 [01:52<00:35,  1.92it/s]

[6, 200] loss: 0.366


Epoch 6/19: 100%|██████████| 268/268 [02:30<00:00,  1.78it/s]
Epoch 7/19:  37%|███▋      | 100/268 [00:58<01:46,  1.58it/s]

[7, 100] loss: 0.379


Epoch 7/19:  75%|███████▍  | 200/268 [02:04<00:41,  1.63it/s]

[7, 200] loss: 0.362


Epoch 7/19: 100%|██████████| 268/268 [02:46<00:00,  1.61it/s]
Epoch 8/19:  37%|███▋      | 100/268 [01:03<01:43,  1.62it/s]

[8, 100] loss: 0.379


Epoch 8/19:  75%|███████▍  | 200/268 [02:02<00:39,  1.72it/s]

[8, 200] loss: 0.377


Epoch 8/19: 100%|██████████| 268/268 [02:39<00:00,  1.68it/s]
Epoch 9/19:  37%|███▋      | 100/268 [00:57<01:32,  1.81it/s]

[9, 100] loss: 0.366


Epoch 9/19:  75%|███████▍  | 200/268 [01:53<00:36,  1.85it/s]

[9, 200] loss: 0.411


Epoch 9/19: 100%|██████████| 268/268 [02:31<00:00,  1.77it/s]
Epoch 10/19:  37%|███▋      | 100/268 [00:56<01:44,  1.61it/s]

[10, 100] loss: 0.398


Epoch 10/19:  75%|███████▍  | 200/268 [01:42<00:31,  2.14it/s]

[10, 200] loss: 0.375


Epoch 10/19: 100%|██████████| 268/268 [02:14<00:00,  1.99it/s]
Epoch 11/19:  37%|███▋      | 100/268 [00:45<01:24,  2.00it/s]

[11, 100] loss: 0.350


Epoch 11/19:  75%|███████▍  | 200/268 [01:33<00:28,  2.42it/s]

[11, 200] loss: 0.337


Epoch 11/19: 100%|██████████| 268/268 [02:05<00:00,  2.14it/s]
Epoch 12/19:  37%|███▋      | 100/268 [00:50<01:35,  1.75it/s]

[12, 100] loss: 0.374


Epoch 12/19:  75%|███████▍  | 200/268 [01:45<00:36,  1.88it/s]

[12, 200] loss: 0.338


Epoch 12/19: 100%|██████████| 268/268 [02:23<00:00,  1.87it/s]
Epoch 13/19:  37%|███▋      | 100/268 [00:55<01:36,  1.75it/s]

[13, 100] loss: 0.332


Epoch 13/19:  75%|███████▍  | 200/268 [01:51<00:35,  1.91it/s]

[13, 200] loss: 0.318


Epoch 13/19: 100%|██████████| 268/268 [02:29<00:00,  1.79it/s]
Epoch 14/19:  37%|███▋      | 100/268 [00:56<01:44,  1.60it/s]

[14, 100] loss: 0.372


Epoch 14/19:  75%|███████▍  | 200/268 [01:50<00:34,  1.96it/s]

[14, 200] loss: 0.334


Epoch 14/19: 100%|██████████| 268/268 [02:21<00:00,  1.89it/s]
Epoch 15/19:  37%|███▋      | 100/268 [00:47<01:24,  1.99it/s]

[15, 100] loss: 0.337


Epoch 15/19:  75%|███████▍  | 200/268 [01:34<00:28,  2.40it/s]

[15, 200] loss: 0.331


Epoch 15/19: 100%|██████████| 268/268 [02:05<00:00,  2.13it/s]
Epoch 16/19:  37%|███▋      | 100/268 [00:47<01:12,  2.33it/s]

[16, 100] loss: 0.310


Epoch 16/19:  75%|███████▍  | 200/268 [01:34<00:34,  1.97it/s]

[16, 200] loss: 0.319


Epoch 16/19: 100%|██████████| 268/268 [02:05<00:00,  2.14it/s]
Epoch 17/19:  37%|███▋      | 100/268 [00:47<01:24,  1.98it/s]

[17, 100] loss: 0.333


Epoch 17/19:  75%|███████▍  | 200/268 [01:33<00:32,  2.12it/s]

[17, 200] loss: 0.269


Epoch 17/19: 100%|██████████| 268/268 [02:04<00:00,  2.15it/s]
Epoch 18/19:  37%|███▋      | 100/268 [00:47<01:20,  2.09it/s]

[18, 100] loss: 0.278


Epoch 18/19:  75%|███████▍  | 200/268 [01:34<00:30,  2.24it/s]

[18, 200] loss: 0.290


Epoch 18/19: 100%|██████████| 268/268 [02:05<00:00,  2.13it/s]
Epoch 19/19:  37%|███▋      | 100/268 [00:46<01:18,  2.14it/s]

[19, 100] loss: 0.293


Epoch 19/19:  75%|███████▍  | 200/268 [01:34<00:29,  2.27it/s]

[19, 200] loss: 0.277


Epoch 19/19: 100%|██████████| 268/268 [02:05<00:00,  2.14it/s]


Validation Accuracy of the model on the validation images: 91.20%


In [None]:
# temporrary code to check device placement
# -- i decided to keep it here but might delete it because why not :) its practically useless


print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")
net = ConvNeuralNet().to(device)
print(next(net.parameters()).device)


In [26]:
correct = 0
total = 0
with torch.no_grad():                    
    for data in test_loader:
        images, labels = data                #-- getting images and labels from the test loader --
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()              #-- calculating correct predictions --

print(f"Accuracy on the test images: {100 * correct / total:.2f}%")


csv_path = r"C:\Users\Mudassar\Desktop\Z_Image\Cars_Tanks.csv"
df = pd.read_csv(csv_path)

print(df.head())
print("\nNames", df.columns.tolist())          #-- checking the dataset
print(df['car'].value_counts())



Accuracy on the test images: 91.49%
                                            filename  car
0  images266-2-_jpg.rf.b8ca42a696e0fab22f27517786...    0
1  images24-77-_png.rf.f493a48626486b306292bbaeb8...    0
2  images264-1-_jpg.rf.a4e7bc2fbf5604e0ef0a2e4c65...    0
3  102-133-_jpg.rf.cf3600113eb6c148c1b8ed1465b419...    0
4  106-237-_jpg.rf.65a5efd274c43e4b1669b01db774ec...    0

Names ['filename', 'car']
car
1    6161
0    6091
Name: count, dtype: int64


In [None]:
# -- checking if the accuracy is the same as before -- 3rd check i am a skeptical person :O

generator = torch.Generator().manual_seed(42)
train_set, val_set, test_set = torch.utils.data.random_split(dataset, [0.7, 0.15, 0.15], generator=generator)
net.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"True test accuracy: {100 * correct / total:.2f}%")


True test accuracy: 90.99%


In [None]:
#saving the full mode

torch.save(net.state_dict(), "car_tank_cnn_model_v2_MAIN.pth")
print("Model saved successfully.")