In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [36]:
import torch
import h5py
from torch.utils.data import Dataset, DataLoader, random_split

class ElectronPhotonDataset(Dataset):
    def __init__(self, electron_file, photon_file):
        with h5py.File(electron_file, "r") as f_e:
            electrons = torch.tensor(f_e["X"][:], dtype=torch.float32) 
            electron_labels = torch.tensor(f_e["y"][:], dtype=torch.long)
        
        with h5py.File(photon_file, "r") as f_p:
            photons = torch.tensor(f_p["X"][:], dtype=torch.float32) 
            photon_labels = torch.tensor(f_p["y"][:], dtype=torch.long)

        self.images = torch.cat((electrons, photons), dim=0) 
        self.labels = torch.cat((electron_labels, photon_labels), dim=0)

        self.images = self.images.permute(0, 3, 1, 2)  

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

dataset = ElectronPhotonDataset("/kaggle/input/photonelectron/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5", "/kaggle/input/photonelectron/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5")

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
# train_size = int(0.4 * len(dataset))
# val_size = int(0.1 * len(dataset))
# rest_size = len(dataset) - train_size - val_size
# train_dataset, val_dataset, _ = random_split(dataset, [train_size, val_size, rest_size])

# Create data loaders with multiple workers
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=8)

In [49]:
class someWildBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(someWildBlock, self).__init__()

        mid_channels = out_channels // 3
        mid_channels_last = out_channels - (mid_channels + mid_channels)
        # self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU()
        )
        
        # 1x1 -> 3x3 conv (to simulate 5x5 receptive field)
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.SiLU(),
            nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.SiLU()
        )
        
        # Depthwise Separable 3x3 Conv
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size=5, groups=in_channels, padding=2, bias=False),
            nn.Conv2d(in_channels, mid_channels_last, kernel_size=1, bias=False),
            nn.BatchNorm2d(mid_channels_last),
            nn.SiLU()
        )
        # self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)

        
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.silu = nn.SiLU()
        
        self.shortcut = nn.Sequential()
        if in_channels != out_channels or stride != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out1 = self.conv1(x)
        out2 = self.conv2(x)
        out3 = self.conv3(x)
        # print(out1.shape, out2.shape, out3.shape)
        target_size = out1.shape[2:]  
        out2 = F.interpolate(out2, size=target_size, mode='bilinear', align_corners=True)
        out3 = F.interpolate(out3, size=target_size, mode='bilinear', align_corners=True)
        
        out = torch.cat([out1, out2, out3], dim=1)
        out = self.bn(out)
        
        out = out+self.shortcut(x)
        # out = self.relu(out)
        # out = self.silu(out)
        return F.silu(out)

In [50]:
class ResNet15(nn.Module):
    def __init__(self, num_classes=2, drop=0.4):
        super(ResNet15, self).__init__()
        self.in_channels = 64

        # self.conv1 = nn.Conv2d(2, self.in_channels, kernel_size=3, stride=1, padding=1)
        # self.conv1 = nn.Conv2d(2, self.in_channels, kernel_size=7, stride=2, padding=3)
        self.conv1 = nn.Conv2d(2, self.in_channels, kernel_size=5, stride=1, padding=2)

        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.bn = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.silu = nn.SiLU()

        self.layer1 = self.block_layer(self.in_channels, 64, 2)
        self.layer2 = self.block_layer(64, 128, 2, stride=2)
        self.layer3 = self.block_layer(128, 256, 1, stride=2)
        self.layer4 = self.block_layer(256, 512, 1, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.fc = nn.Sequential(
                    nn.Linear(512, 256),
                    nn.ReLU(),
                    # nn.Dropout(0.4),
                    nn.Dropout(drop),
                    nn.Linear(256, num_classes)
                )
        
        self.shortcut_conv = nn.Conv2d(64, 512, kernel_size=1, stride=4, bias=False)
        self.shortcut_bn = nn.BatchNorm2d(512)

    def block_layer(self, in_channels, out_channels, blocks, stride=1):
        strides = [stride] + [1] * (blocks - 1) 
        layers = []
        for s in strides:
            # layers.append(Block(self.in_channels, out_channels, s))
            layers.append(someWildBlock(self.in_channels, out_channels, s))
            self.in_channels = out_channels 
        return nn.Sequential(*layers)

    def forward(self, x):
        shortcut_connection = self.conv1(x)
        shortcut_connection = self.shortcut_conv(shortcut_connection) 
        shortcut_connection = self.shortcut_bn(shortcut_connection)

        x = self.relu(self.bn(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        if shortcut_connection.shape != x.shape:
            shortcut_connection = F.interpolate(shortcut_connection, size=x.shape[2:], mode="bilinear")

        x = x+shortcut_connection
        x = self.avgpool(x)

        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet15(num_classes=2).to(device)
model = nn.DataParallel(model)
model.load_state_dict(torch.load("/kaggle/working/resnet15_15.pth"))

  model.load_state_dict(torch.load("/kaggle/working/resnet15_15.pth"))


<All keys matched successfully>

In [48]:
import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import OneCycleLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ResNet15(num_classes=2).to(device)
model = nn.DataParallel(model)  

criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.0003)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
# optimizer = FusedLAMB(model.parameters(), lr=1e-3, weight_decay=1e-4)
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
# scheduler = OneCycleLR(
#     optimizer,
#     max_lr=5e-3,
#     steps_per_epoch=len(train_loader), 
#     total_steps=None,
#     epochs=10,
#     pct_start=0.3,
#     anneal_strategy="cos",  
#     div_factor=10,  
#     final_div_factor=100  
# )
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=4)

max_val = 0
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for X, y in train_loader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model(X.float())
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

    train_acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {train_acc:.2f}%")

    # Validation phase
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for X, y in val_loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X.float())
            loss = criterion(outputs, y)

            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == y).sum().item()
            val_total += y.size(0)

    val_acc = 100 * val_correct / val_total
    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_acc:.2f}%")

    if val_acc > max_val:
        torch.save(model.state_dict(), "resnet15.pth")
        max_val = val_acc

    # After 10 epochs, re-instantiate the model with best parameters and drop=0.8
    if epoch + 1 == 10:
        print("Re-instantiating model with best parameters and drop=0.8")
        best_model = ResNet15(num_classes=2, drop=0.8).to(device)
        best_model.load_state_dict(torch.load("resnet15.pth"))
        model = best_model

Epoch 1/5, Loss: 0.5018, Accuracy: 76.01%  
Validation Loss: 0.5206, Accuracy: 74.52%  
Epoch 2/5, Loss: 0.4859, Accuracy: 76.86%  
Validation Loss: 0.5282, Accuracy: 74.43%  
Epoch 3/5, Loss: 0.4668, Accuracy: 77.79%  
Validation Loss: 0.5512, Accuracy: 73.77%  
Epoch 4/5, Loss: 0.4443, Accuracy: 77.02%  
Validation Loss: 0.6275, Accuracy: 69.09%  
Epoch 5/5, Loss: 0.4229, Accuracy: 78.01%  
Validation Loss: 0.6937, Accuracy: 74.72%


In [53]:
import torch

# Load the saved model state
model_path = "/kaggle/working/resnet15_15.pth"
model.load_state_dict(torch.load(model_path))
model.eval()

correct, total = 0, 0
with torch.no_grad():
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        outputs = model(X.float())
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

train_acc = 100 * correct / total
print(f"Training Accuracy: {train_acc:.2f}%")

# Evaluate on validation set
correct, total = 0, 0
with torch.no_grad():
    for X, y in val_loader:
        X, y = X.to(device), y.to(device)
        outputs = model(X.float())
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

val_acc = 100 * correct / total
print(f"Validation Accuracy: {val_acc:.2f}%")


  model.load_state_dict(torch.load(model_path))


Training Accuracy: 77.90%
Validation Accuracy: 74.72%


In [52]:
import torch

# Load the state dictionary from the .pth file
state_dict = torch.load("resnet15_15.pth", map_location="cpu")

# Iterate through all parameters in the state dict
for name, param in state_dict.items():
    print(f"Parameter: {name}")
    print(f"Shape: {param.shape}")
    # Print a summary: first 5 elements flattened (if tensor has enough elements)
    flattened = param.view(-1)
    num_elements = flattened.numel()
    sample = flattened[:5] if num_elements >= 5 else flattened
    print(f"Sample values: {sample}")
    print("-" * 50)


  state_dict = torch.load("resnet15_15.pth", map_location="cpu")


Parameter: module.conv1.weight
Shape: torch.Size([64, 2, 5, 5])
Sample values: tensor([0.0567, 0.0706, 0.0925, 0.0081, 0.2211])
--------------------------------------------------
Parameter: module.conv1.bias
Shape: torch.Size([64])
Sample values: tensor([-0.1172, -0.1344,  0.0044, -0.1296,  0.0683])
--------------------------------------------------
Parameter: module.bn.weight
Shape: torch.Size([64])
Sample values: tensor([0.8612, 0.8424, 0.9448, 1.1179, 1.0341])
--------------------------------------------------
Parameter: module.bn.bias
Shape: torch.Size([64])
Sample values: tensor([ 0.0328, -0.0451,  0.1046,  0.1325,  0.1172])
--------------------------------------------------
Parameter: module.bn.running_mean
Shape: torch.Size([64])
Sample values: tensor([-0.1182, -0.1333,  0.0052, -0.1304,  0.0690])
--------------------------------------------------
Parameter: module.bn.running_var
Shape: torch.Size([64])
Sample values: tensor([0.0014, 0.0012, 0.0007, 0.0010, 0.0009])
------------