In [2]:
pip install thop

Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->thop)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->thop)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->thop)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2

In [3]:
# LeafNet-ReLU(Fully Customized) Training 
# Author: Md. Sifat Haque Zidan

import os, time, copy, random, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.optim as optim
import matplotlib.pyplot as plt, seaborn as sns
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support

# FLOPs
try:
    from thop import profile
    THOP_AVAILABLE = True
except Exception:
    THOP_AVAILABLE = False
    print("thop not available — FLOPs skipped")

# Settings 
DATA_ROOT = "/kaggle/input/plant-disease-dataset/Dataset_Final_V2_Split"
OUT_DIR = "/kaggle/working/leafnet_relu_final"
os.makedirs(OUT_DIR, exist_ok=True)

IMG_SIZE, BATCH_SIZE, EPOCHS = 160, 32, 100
LR, NUM_WORKERS, SEED = 1e-3, 4, 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(SEED); np.random.seed(SEED); random.seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)
print(f"Device: {DEVICE} | GPUs: {torch.cuda.device_count()}")

# Dataset 
tfm = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_ds = datasets.ImageFolder(f"{DATA_ROOT}/train", transform=tfm)
val_ds   = datasets.ImageFolder(f"{DATA_ROOT}/val", transform=tfm)
test_ds  = datasets.ImageFolder(f"{DATA_ROOT}/test", transform=tfm)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader  = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
class_names = train_ds.classes; NUM_CLASSES = len(class_names)
print(f"Classes={NUM_CLASSES}, Train={len(train_ds)}, Val={len(val_ds)}, Test={len(test_ds)}")

# Model 
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1):
        super().__init__()
        self.depth = nn.Conv2d(in_ch, in_ch, 3, stride, 1, groups=in_ch, bias=False)
        self.point = nn.Conv2d(in_ch, out_ch, 1, bias=False)
        self.norm = nn.GroupNorm(8 if out_ch%8==0 else 4, out_ch)
        self.act = nn.ReLU(inplace=True)
    def forward(self, x): return self.act(self.norm(self.point(self.depth(x))))

class LeafNetReLU(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        c1,c2,c3,c4 = 144,224,320,448
        self.stem = nn.Sequential(nn.Conv2d(3,c1,3,1,1,bias=False),
                                  nn.GroupNorm(8,c1), nn.ReLU(inplace=True))
        self.block1 = nn.Sequential(DepthwiseSeparableConv(c1,c2,2), nn.Dropout(0.15))
        self.block2 = nn.Sequential(DepthwiseSeparableConv(c2,c3,2), nn.Dropout(0.20))
        self.block3 = nn.Sequential(DepthwiseSeparableConv(c3,c4,2), nn.Dropout(0.25))
        self.conv_extra = nn.Sequential(nn.Conv2d(c4,c4,3,1,1,bias=False),
                                        nn.GroupNorm(8,c4), nn.ReLU(inplace=True), nn.Dropout(0.25))
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Flatten(),
                                        nn.Linear(c4,1024,bias=False),
                                        nn.GroupNorm(8,1024),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(0.4),
                                        nn.Linear(1024,num_classes))
    def forward(self,x):
        x=self.stem(x); x=self.block1(x); x=self.block2(x); x=self.block3(x)
        x=self.conv_extra(x); x=self.gap(x); x=self.classifier(x); return x

model = LeafNetReLU(NUM_CLASSES)
params = sum(p.numel() for p in model.parameters())
print(f"Total Params: {params:,} ({params/1e6:.3f}M)")

# Efficiency Summary 
flops_M, inf_ms, size_mb = None, None, None
if THOP_AVAILABLE:
    f, _ = profile(model, inputs=(torch.randn(1,3,IMG_SIZE,IMG_SIZE),), verbose=False)
    flops_M = f/1e6
dummy = torch.randn(1,3,IMG_SIZE,IMG_SIZE).to(DEVICE)
model = model.to(DEVICE)
if torch.cuda.device_count()>1:
    model = nn.DataParallel(model)
model.eval(); torch.cuda.synchronize() if DEVICE.type=='cuda' else None
start=time.time(); 
with torch.no_grad():
    for _ in range(30): _=model(dummy)
torch.cuda.synchronize() if DEVICE.type=='cuda' else None
inf_ms=(time.time()-start)/30*1000
tmp=os.path.join(OUT_DIR,"tmp.pth"); torch.save(model.state_dict(),tmp)
size_mb=os.path.getsize(tmp)/(1024*1024); os.remove(tmp)

print(f"FLOPs={flops_M:.2f}M | Inference={inf_ms:.2f}ms | Size={size_mb:.2f}MB")

# Training 
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters(), lr=LR)

def run_epoch(loader, train=True):
    if train: model.train()
    else: model.eval()
    total_loss, correct, total = 0,0,0
    preds_all, labels_all = [], []
    start=time.time()
    with torch.set_grad_enabled(train):
        for x,y in loader:
            x,y=x.to(DEVICE),y.to(DEVICE)
            if train: optimizer.zero_grad()
            out=model(x); loss=criterion(out,y)
            if train: loss.backward(); optimizer.step()
            _,p=torch.max(out,1)
            total_loss+=loss.item()*x.size(0)
            correct+=torch.sum(p==y).item(); total+=x.size(0)
            preds_all.append(p.cpu().numpy()); labels_all.append(y.cpu().numpy())
    t=time.time()-start
    preds_all=np.concatenate(preds_all); labels_all=np.concatenate(labels_all)
    return total_loss/total, correct/total, preds_all, labels_all, t

history=[]; best_val_acc=0; best_state=None
for e in range(1,EPOCHS+1):
    tr_l,tr_a,_,_,tr_t=run_epoch(train_loader,True)
    vl_l,vl_a,_,_,vl_t=run_epoch(val_loader,False)
    ep_t=tr_t+vl_t
    if vl_a>best_val_acc:
        best_val_acc=vl_a; best_state=copy.deepcopy(model.state_dict())
        torch.save(best_state, f"{OUT_DIR}/LeafNet_ReLU_best.pth")
    history.append([e,tr_l,tr_a,vl_l,vl_a,tr_t,vl_t,ep_t])
    print(f"Epoch {e:02d}/{EPOCHS} | Train {tr_a:.4f} | Val {vl_a:.4f} | Loss {tr_l:.3f}/{vl_l:.3f} | Time {ep_t:.1f}s")

hist_df=pd.DataFrame(history,columns=["epoch","train_loss","train_acc","val_loss","val_acc","train_time","val_time","epoch_time"])
hist_df.to_csv(f"{OUT_DIR}/LeafNet_ReLU_history.csv",index=False)

# Test Evaluation 
if best_state: model.load_state_dict(best_state)
model.eval(); y_t,y_p=[],[]
with torch.no_grad():
    for x,y in test_loader:
        x,y=x.to(DEVICE),y.to(DEVICE)
        out=model(x); _,p=torch.max(out,1)
        y_t.append(y.cpu().numpy()); y_p.append(p.cpu().numpy())
y_t=np.concatenate(y_t); y_p=np.concatenate(y_p)
acc=np.mean(y_t==y_p)
prec,rec,f1,_=precision_recall_fscore_support(y_t,y_p,average="weighted")
print(f"\nPrecision={prec:.4f}, Recall={rec:.4f}, F1={f1:.4f}, TestAcc={acc:.4f}")
pd.DataFrame(classification_report(y_t,y_p,target_names=class_names,output_dict=True)).T.to_csv(f"{OUT_DIR}/classification_report.csv")

# Visualization 
plt.figure(figsize=(8,5))
plt.plot(hist_df["epoch"],hist_df["train_loss"],label="Train Loss")
plt.plot(hist_df["epoch"],hist_df["val_loss"],label="Val Loss")
plt.xlabel("Epoch");plt.ylabel("Loss");plt.legend();plt.title("Loss Curve")
plt.tight_layout();plt.savefig(f"{OUT_DIR}/loss_curve.png",dpi=150);plt.close()

plt.figure(figsize=(8,5))
plt.plot(hist_df["epoch"],hist_df["train_acc"],label="Train Acc")
plt.plot(hist_df["epoch"],hist_df["val_acc"],label="Val Acc")
plt.xlabel("Epoch");plt.ylabel("Accuracy");plt.legend();plt.title("Accuracy Curve")
plt.tight_layout();plt.savefig(f"{OUT_DIR}/accuracy_curve.png",dpi=150);plt.close()

cm=confusion_matrix(y_t,y_p)
plt.figure(figsize=(12,10))
sns.heatmap(cm,annot=False,cmap="Blues")
plt.title("Confusion Matrix - LeafNet_ReLU")
plt.tight_layout();plt.savefig(f"{OUT_DIR}/confusion_matrix.png",dpi=150);plt.close()

plt.figure(figsize=(6,4))
plt.bar(["Precision","Recall","F1","Accuracy"],[prec,rec,f1,acc],color=["#4CAF50","#2196F3","#FFC107","#9C27B0"])
plt.ylim(0,1);plt.title("LeafNet_ReLU Metrics");plt.tight_layout()
plt.savefig(f"{OUT_DIR}/metrics_barplot.png",dpi=150);plt.close()

# Efficiency Table 
summary = pd.DataFrame([{
    "Model":"LeafNet_ReLU",
    "Params(M)":round(params/1e6,3),
    "FLOPs(M)":round(flops_M,2) if flops_M else None,
    "Inference(ms)":round(inf_ms,2),
    "Size(MB)":round(size_mb,2),
    "ValAcc(%)":round(best_val_acc*100,2),
    "TestAcc(%)":round(acc*100,2),
    "Precision":round(prec,3),
    "Recall":round(rec,3),
    "F1":round(f1,3)
}])
summary.to_csv(f"{OUT_DIR}/LeafNet_ReLU_efficiency_summary.csv",index=False)
print("\n Efficiency Summary")
print(summary.to_string(index=False))

print("\n All results saved under:", OUT_DIR)
for f in sorted(os.listdir(OUT_DIR)): print(" -", f)


Device: cuda | GPUs: 2
Classes=51, Train=95504, Val=20472, Test=20506
Total Params: 2,579,955 (2.580M)
FLOPs=1213.90M | Inference=34.37ms | Size=9.86MB
Epoch 01/100 | Train 0.5989 | Val 0.7929 | Loss 1.344/0.630 | Time 340.8s
Epoch 02/100 | Train 0.8408 | Val 0.8624 | Loss 0.485/0.413 | Time 255.3s
Epoch 03/100 | Train 0.8926 | Val 0.9246 | Loss 0.318/0.223 | Time 248.2s
Epoch 04/100 | Train 0.9208 | Val 0.9375 | Loss 0.236/0.177 | Time 248.3s
Epoch 05/100 | Train 0.9378 | Val 0.9596 | Loss 0.182/0.120 | Time 249.6s
Epoch 06/100 | Train 0.9497 | Val 0.9618 | Loss 0.147/0.111 | Time 249.4s
Epoch 07/100 | Train 0.9579 | Val 0.9618 | Loss 0.122/0.109 | Time 250.6s
Epoch 08/100 | Train 0.9640 | Val 0.9725 | Loss 0.104/0.079 | Time 249.5s
Epoch 09/100 | Train 0.9673 | Val 0.9642 | Loss 0.091/0.104 | Time 250.2s
Epoch 10/100 | Train 0.9724 | Val 0.9747 | Loss 0.079/0.070 | Time 248.5s
Epoch 11/100 | Train 0.9753 | Val 0.9748 | Loss 0.071/0.074 | Time 248.1s
Epoch 12/100 | Train 0.9781 | Val 