In [4]:
import torch.nn as nn
from torchvision import datasets, transforms
import torch
import torch.optim as optim
from PIL import Image
import pandas as pd
import os
from torchinfo import summary
!nvidia-smi
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

Thu Jan 30 15:02:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1050 Ti     Off |   00000000:01:00.0 Off |                  N/A |
| 30%   38C    P0             N/A /   75W |     301MiB /   4096MiB |     18%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Quadro P620                    Off |   00

True

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
folderPath = "/home/atlas/Documents/ImageNet/Kaggle/"
class ImageCSVLoader(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform 
        Allfolders =  os.listdir(folderPath + "ILSVRC/Data/CLS-LOC/train")
        # print(Allfolders)
        self.class_map = {label: idx for idx, label in enumerate(Allfolders)}

    def __len__(self):
        return len(self.data)  # Total number of images

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.data.iloc[idx, 0])
        newLabel = img_name.replace(self.img_dir, '')
        if "train" in self.img_dir:
            image = Image.open(self.img_dir + newLabel.split("_")[0] + "/" + newLabel + ".JPEG").convert("RGB")
        else:
            image = Image.open(img_name + ".JPEG").convert("RGB")  # Load image
        label = str(self.data.loc[self.data["ImageId"] == newLabel, "PredictionString"].values[0].split()[0])
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(self.class_map[label], dtype=torch.long)

val_dataset = ImageCSVLoader(csv_file=folderPath + "LOC_val_solution.csv", img_dir= folderPath + 'ILSVRC/Data/CLS-LOC/val/', transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True, pin_memory=True)

train_dataset = ImageCSVLoader(csv_file=folderPath + "LOC_train_solution.csv", img_dir=folderPath + 'ILSVRC/Data/CLS-LOC/train/', transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True)



In [8]:
import torch
import torch.nn as nn

class SeBlock(nn.Module):
    def __init__(self, in_channels, r=16):
        super().__init__()
        C = in_channels
        self.globpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(C, C // r, bias=False)
        self.fc2 = nn.Linear(C // r, C, bias=False)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x shape: [N, C, H, W]
        f = self.globpool(x)
        f = torch.flatten(f, 1)
        f = self.relu(self.fc1(f))
        f = self.sigmoid(self.fc2(f))
        f = f[:,:,None,None]  # Adds the singleton dimensions
        # f shape: [N, C, 1, 1]
        scale = x * f
        return scale

class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_channels, reduction=16):
        super().__init__()
        self.relu = nn.ReLU()
        # SE Block for each projection
        
        
        # Different projections with SE block
        self.p1 = nn.Sequential(*[nn.Conv2d(in_channels, out_channels[0], kernel_size=1, padding=0, stride=1), self.relu, 
                                  SeBlock(out_channels[0], reduction)])
        self.p2 = nn.Sequential(*[nn.Conv2d(in_channels, out_channels[1], kernel_size=1, padding=0, stride=1), self.relu, 
                                  nn.Conv2d(out_channels[1], out_channels[2], kernel_size=3, padding=1, stride=1), self.relu,
                                  SeBlock(out_channels[2], reduction)])
        self.p3 = nn.Sequential(*[nn.Conv2d(in_channels, out_channels[3], kernel_size=1, padding=0, stride=1), self.relu, 
                                  nn.Conv2d(out_channels[3], out_channels[4], kernel_size=5, padding=2, stride=1), self.relu,
                                  SeBlock(out_channels[4], reduction)])
        self.p4 = nn.Sequential(*[nn.MaxPool2d(kernel_size=3, padding=1, stride=1), nn.Conv2d(in_channels, out_channels[5], kernel_size=1, padding=0, stride=1),
                                  SeBlock(out_channels[5], reduction)])
        
    def forward(self, x):
        o1 = self.p1(x)
        # print(o1.shape)
        o2 = self.p2(x)
        # print(o2.shape)
        o3 = self.p3(x)
        # print(o3.shape)
        o4 = self.p4(x)
        # print(o4.shape)
        # Apply SE block to each projection
        # o1 = self.se_block(o1)
        # o2 = self.se_block(o2)
        # o3 = self.se_block(o3)
        # o4 = self.se_block(o4)
        # print(o1.shape, o2.shape, o3.shape, o4.shape)
        return torch.cat((o1, o2, o3, o4), axis=1)

class AuxClassifier(nn.Module):
    def __init__(self, in_channels, classes):
        super().__init__()
        in_features = 4 * 4 * 128
        self.avg_pool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv1x1 = nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=1, stride=1, padding=0)
        self.fc1 = nn.Linear(in_features=in_features, out_features=in_features)
        self.fc2 = nn.Linear(in_features=in_features, out_features=classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.7)
    
    def forward(self, x):
        x = self.avg_pool(x)
        x = self.relu(self.conv1x1(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.dropout(self.fc1(x)))
        x = self.fc2(x)
        return x

class GoogLeNet(nn.Module): 
    def __init__(self, in_depth=3, classes=1000):
        super().__init__()

        in_channels = [192, 256, 480, 512, 512, 512, 528, 832, 832, 1024]
        feature_maps = [[64, 96, 128, 16, 32, 32],
                        [128, 128, 192, 32, 96, 64],
                        [192, 96, 208, 16, 48, 64],
                        [160, 112, 224, 24, 64, 64],
                        [128, 128, 256, 24, 64, 64],
                        [112, 144, 288, 32, 64, 64],
                        [256, 160, 320, 32, 128, 128],
                        [256, 160, 320, 32, 128, 128],
                        [384, 192, 384, 48, 128, 128]
                    ]
    
        self.AuxClass1 = AuxClassifier(512, classes)
        self.AuxClass2 = AuxClassifier(528,classes)
        self.Blocks = nn.ModuleList([InceptionBlock(in_channels[i], feature_maps[i]) for i in range(len(feature_maps))])
        
        # Rest of the model
        self.Conv7k = nn.Conv2d(in_channels=in_depth, out_channels=64, kernel_size=7, stride=2, padding=3)
        self.Conv1k = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0)
        self.Conv3k = nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1)
        self.MaxPool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.LocalNorm = nn.LocalResponseNorm(size=5)
        self.AvgPool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.FC = nn.Linear(1024, classes)
        self.Dropout = nn.Dropout(0.4)
        self.relu = nn.ReLU()

    def forward(self, x):
        outputs = []
        # x shape : [batch, 3, 224, 224]
        x = self.relu(self.Conv7k(x))
        x = self.LocalNorm(self.MaxPool(x))
        x = self.relu(self.Conv1k(x))
        x = self.LocalNorm(self.relu(self.Conv3k(x)))
        x = self.MaxPool(x)
        for i, block in enumerate(self.Blocks):
            if i == 2 or i == 7:
                x = self.MaxPool(x)
            elif i == 3:
                outputs.append(self.AuxClass1(x))
            elif i == 6:
                outputs.append(self.AuxClass2(x))

            x = block(x)
        
        x = self.Dropout(self.AvgPool(x))
        x = torch.flatten(x, 1)
        x = self.FC(x)
        outputs.append(x)
        return outputs
Inception = GoogLeNet()
outs = Inception(torch.rand(1, 3, 224, 224))
for out in outs:
    print(out.shape)


torch.Size([1, 1000])
torch.Size([1, 1000])
torch.Size([1, 1000])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

from torchsummary import summary

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model and move it to device (GPU if available)
model = GoogLeNet().to(device)

# Print model summary
# print(summary(model, input_size=(3, 224, 224)))

criterion = nn.CrossEntropyLoss().to(device)  # Move loss function to GPU
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Loss weighting (as used in the original Inception paper)
aux_weight = 0.3  # Auxiliary classifiers contribute 30% to total loss

# Training loop
num_epochs = 10  # Adjust as needed

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    train_loader_tqdm = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", leave=False)

    for batch in train_loader_tqdm:
        images, labels = batch  # Extract images and labels
        images, labels = images.to(device), labels.to(device)  # Move images and labels to the device

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        aux1, aux2, main_out = outputs

        # Compute losses
        loss_main = criterion(main_out, labels)
        loss_aux1 = criterion(aux1, labels) * aux_weight
        loss_aux2 = criterion(aux2, labels) * aux_weight

        total_loss = loss_main + loss_aux1 + loss_aux2
        total_loss.backward()
        optimizer.step()

        running_loss += total_loss.item()
        train_loader_tqdm.set_postfix(loss=f"{total_loss.item():.4f}")

    avg_train_loss = running_loss / len(train_dataloader)

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    val_loader_tqdm = tqdm(val_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Validation]", leave=False)

    with torch.no_grad():
        for batch in val_loader_tqdm:
            images, labels = batch  # Extract images and labels
            images, labels = images.to(device), labels.to(device)  # Move images and labels to the device

            # Forward pass
            outputs = model(images)
            aux1, aux2, main_out = outputs

            # Compute losses
            loss_main = criterion(main_out, labels)
            loss_aux1 = criterion(aux1, labels) * aux_weight
            loss_aux2 = criterion(aux2, labels) * aux_weight

            total_loss = loss_main + loss_aux1 + loss_aux2
            val_loss += total_loss.item()

            # Compute accuracy
            _, predicted = torch.max(main_out, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            val_loader_tqdm.set_postfix(loss=f"{total_loss.item():.4f}")

    avg_val_loss = val_loss / len(val_dataloader)
    val_accuracy = 100 * correct / total

    print(f"\nEpoch {epoch+1}/{num_epochs}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}, Val Accuracy = {val_accuracy:.2f}%")

    # Save model weights after each epoch
    torch.save(model.state_dict(), f'model_epoch_{epoch+1}.pth')

print("Training complete! Model weights saved.")



Epoch 1/10 [Training]:   5%|▌         | 449/8509 [24:55<7:32:00,  3.36s/it, loss=11.0857]

In [None]:
torch.save(model.state_dict(), 'modelCustom_weights.pth')