<a href="https://colab.research.google.com/github/parikshithsivakumar/NeuralNetworkandDeepLearning/blob/main/collabmain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
import zipfile
import os

# Change this to your dataset link if you use another one later
!gdown --id 1V2BqH0zGLis1a4BCQlULaYMyw113CTdK

# Extract the zip
with zipfile.ZipFile("archive (2).zip", 'r') as zip_ref:
    zip_ref.extractall("DatasetFlat")


Downloading...
From (original): https://drive.google.com/uc?id=1V2BqH0zGLis1a4BCQlULaYMyw113CTdK
From (redirected): https://drive.google.com/uc?id=1V2BqH0zGLis1a4BCQlULaYMyw113CTdK&confirm=t&uuid=de1e8a2c-338a-47c4-9aca-047e9c5d1413
To: /content/archive (2).zip
100% 843M/843M [00:13<00:00, 62.9MB/s]


In [None]:
import shutil

# Recursively delete all `.ipynb_checkpoints` folders
for root, dirs, files in os.walk("DatasetFlat"):
    for d in dirs:
        if d == ".ipynb_checkpoints":
            shutil.rmtree(os.path.join(root, d))


In [9]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import os
import copy
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# Load the dataset
dataset_path = "/content/DatasetFlat/Minet 5640 Images"  # Based on extracted folder structure
dataset = datasets.ImageFolder(dataset_path, transform=transform_train)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_subset, val_subset, test_subset = random_split(dataset, [train_size, val_size, test_size])

val_subset.dataset.transform = transform_val_test
test_subset.dataset.transform = transform_val_test

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32)
test_loader = DataLoader(test_subset, batch_size=32)

# Load ConvNeXt model
model = models.convnext_base(weights='IMAGENET1K_V1')
model.classifier[2] = nn.Linear(model.classifier[2].in_features, len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

num_epochs = 5
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

os.makedirs("saved_models", exist_ok=True)

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("-" * 30)

    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(loader, desc=phase):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/convnext_best.pth')
print(f"\n✅ Best ConvNeXt model saved with val accuracy: {best_acc:.4f}")


Device: cuda


Downloading: "https://download.pytorch.org/models/convnext_base-6075fbad.pth" to /root/.cache/torch/hub/checkpoints/convnext_base-6075fbad.pth
100%|██████████| 338M/338M [00:02<00:00, 161MB/s]



Epoch 1/5
------------------------------


train: 100%|██████████| 124/124 [05:01<00:00,  2.43s/it]


Train Loss: 0.8113, Acc: 0.7352


val: 100%|██████████| 27/27 [00:21<00:00,  1.24it/s]


Val Loss: 0.3122, Acc: 0.9078

Epoch 2/5
------------------------------


train: 100%|██████████| 124/124 [05:06<00:00,  2.47s/it]


Train Loss: 0.2044, Acc: 0.9450


val: 100%|██████████| 27/27 [00:21<00:00,  1.25it/s]


Val Loss: 0.2380, Acc: 0.9314

Epoch 3/5
------------------------------


train: 100%|██████████| 124/124 [05:05<00:00,  2.46s/it]


Train Loss: 0.0944, Acc: 0.9767


val: 100%|██████████| 27/27 [00:21<00:00,  1.27it/s]


Val Loss: 0.1834, Acc: 0.9374

Epoch 4/5
------------------------------


train: 100%|██████████| 124/124 [05:06<00:00,  2.47s/it]


Train Loss: 0.0718, Acc: 0.9805


val: 100%|██████████| 27/27 [00:21<00:00,  1.27it/s]


Val Loss: 0.2089, Acc: 0.9326

Epoch 5/5
------------------------------


train: 100%|██████████| 124/124 [05:07<00:00,  2.48s/it]


Train Loss: 0.0535, Acc: 0.9856


val: 100%|██████████| 27/27 [00:21<00:00,  1.27it/s]


Val Loss: 0.1717, Acc: 0.9385

✅ Best ConvNeXt model saved with val accuracy: 0.9385


In [11]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define test transform
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

model = models.convnext_base(weights=None)
model.classifier[2] = nn.Linear(model.classifier[2].in_features, len(class_names))
model.load_state_dict(torch.load('saved_models/convnext_best.pth', map_location=device))
model.to(device)
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Accuracy
accuracy = (all_preds == all_labels).mean()
print(f"\n✅ ConvNeXt Test Accuracy: {accuracy:.4f}")

# Classification report (precision, recall, f1-score per class)
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
print("\nConfusion Matrix:")
print(cm)


Device: cuda


Evaluating: 100%|██████████| 27/27 [00:23<00:00,  1.13it/s]


✅ ConvNeXt Test Accuracy: 0.9705

Classification Report:
              precision    recall  f1-score   support

     biotite       0.96      0.95      0.96       142
     bornite       0.99      0.97      0.98        78
 chrysocolla       0.95      0.96      0.96        82
   malachite       0.98      0.98      0.98       168
   muscovite       0.87      0.92      0.89        64
      pyrite       0.99      1.00      0.99       154
      quartz       0.99      0.97      0.98       159

    accuracy                           0.97       847
   macro avg       0.96      0.97      0.96       847
weighted avg       0.97      0.97      0.97       847


Confusion Matrix:
[[135   0   0   0   7   0   0]
 [  1  76   0   0   1   0   0]
 [  0   0  79   3   0   0   0]
 [  0   0   3 165   0   0   0]
 [  2   1   1   0  59   0   1]
 [  0   0   0   0   0 154   0]
 [  2   0   0   0   1   2 154]]





In [12]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import copy
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms for EfficientNet (input size 224x224)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset (adjust your path)
full_dataset = datasets.ImageFolder('/content/DatasetFlat/Minet 5640 Images', transform=None)
# Split dataset into train/val (80/20 split)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms to subsets
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

# Data loaders
batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

# Load EfficientNet-b0 pretrained model
model = models.efficientnet_b0(pretrained=True)
num_classes = len(full_dataset.classes)

# Replace classifier head
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

num_epochs = 5
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
            loader = train_loader
        else:
            model.eval()
            loader = val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/efficientnet_b0_best.pth')
print(f"\nBest val accuracy: {best_acc:.4f}")
print("Model saved as saved_models/efficientnet_b0_best.pth")


Using device: cuda


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 130MB/s] 



Epoch 1/5
Train Loss: 1.1879, Acc: 0.6281
Val Loss: 0.5469, Acc: 0.8422

Epoch 2/5
Train Loss: 0.4551, Acc: 0.8655
Val Loss: 0.2936, Acc: 0.9069

Epoch 3/5
Train Loss: 0.2406, Acc: 0.9309
Val Loss: 0.2315, Acc: 0.9255

Epoch 4/5
Train Loss: 0.1627, Acc: 0.9515
Val Loss: 0.2137, Acc: 0.9362

Epoch 5/5
Train Loss: 0.1069, Acc: 0.9716
Val Loss: 0.2072, Acc: 0.9353

Best val accuracy: 0.9362
Model saved as saved_models/efficientnet_b0_best.pth


In [15]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define test transform
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load EfficientNet-B0 model, change classifier for your num_classes
model = models.efficientnet_b0(weights=None)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(class_names))

# Load saved weights
model.load_state_dict(torch.load('saved_models/efficientnet_b0_best.pth', map_location=device))
model.to(device)
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Accuracy
accuracy = (all_preds == all_labels).mean()
print(f"\n✅ EfficientNet Test Accuracy: {accuracy:.4f}")

# Classification report (precision, recall, f1-score per class)
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
print("\nConfusion Matrix:")
print(cm)


Device: cuda


Evaluating: 100%|██████████| 27/27 [00:13<00:00,  1.93it/s]


✅ EfficientNet Test Accuracy: 0.9728

Classification Report:
              precision    recall  f1-score   support

     biotite       0.94      1.00      0.97       168
     bornite       0.96      0.89      0.92        61
 chrysocolla       0.96      0.97      0.97        78
   malachite       0.99      0.99      0.99       145
   muscovite       0.96      0.85      0.90        54
      pyrite       0.99      1.00      0.99       163
      quartz       0.99      0.98      0.98       178

    accuracy                           0.97       847
   macro avg       0.97      0.95      0.96       847
weighted avg       0.97      0.97      0.97       847


Confusion Matrix:
[[168   0   0   0   0   0   0]
 [  2  54   1   1   2   1   0]
 [  0   1  76   1   0   0   0]
 [  0   1   1 143   0   0   0]
 [  5   0   1   0  46   0   2]
 [  0   0   0   0   0 163   0]
 [  3   0   0   0   0   1 174]]





In [19]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import copy
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms (224x224)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset
full_dataset = datasets.ImageFolder('/content/DatasetFlat/Minet 5640 Images', transform=None)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms to subsets
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

# Data loaders
batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

# Load DenseNet121 pretrained model
model = models.densenet121(pretrained=True)
num_classes = len(full_dataset.classes)

# Replace classifier
model.classifier = nn.Linear(model.classifier.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training loop
num_epochs = 10
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/densenet121_best.pth')
print(f"\nBest val accuracy: {best_acc:.4f}")
print("Model saved as saved_models/densenet121_best.pth")


Using device: cuda

Epoch 1/10
Train Loss: 0.7521, Acc: 0.7640
Val Loss: 0.3011, Acc: 0.9220

Epoch 2/10
Train Loss: 0.2018, Acc: 0.9499
Val Loss: 0.2198, Acc: 0.9335

Epoch 3/10
Train Loss: 0.0903, Acc: 0.9763
Val Loss: 0.2100, Acc: 0.9273

Epoch 4/10
Train Loss: 0.0565, Acc: 0.9860
Val Loss: 0.2057, Acc: 0.9424

Epoch 5/10
Train Loss: 0.0473, Acc: 0.9867
Val Loss: 0.2224, Acc: 0.9371

Epoch 6/10
Train Loss: 0.0396, Acc: 0.9880
Val Loss: 0.2124, Acc: 0.9459

Epoch 7/10
Train Loss: 0.0313, Acc: 0.9887
Val Loss: 0.2135, Acc: 0.9424

Epoch 8/10
Train Loss: 0.0308, Acc: 0.9883
Val Loss: 0.2008, Acc: 0.9495

Epoch 9/10
Train Loss: 0.0312, Acc: 0.9889
Val Loss: 0.2233, Acc: 0.9388

Epoch 10/10
Train Loss: 0.0320, Acc: 0.9860
Val Loss: 0.2110, Acc: 0.9468

Best val accuracy: 0.9495
Model saved as saved_models/densenet121_best.pth


In [20]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Test transforms
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load DenseNet-121 and update classifier
model = models.densenet121(weights=None)
model.classifier = nn.Linear(model.classifier.in_features, len(class_names))

# Load saved model weights
model.load_state_dict(torch.load('saved_models/densenet121_best.pth', map_location=device))
model.to(device)
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = (all_preds == all_labels).mean()
print(f"\n✅ DenseNet-121 Test Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

cm = confusion_matrix(all_labels, all_preds)
print("\nConfusion Matrix:")
print(cm)


Device: cuda


Evaluating: 100%|██████████| 27/27 [00:17<00:00,  1.55it/s]


✅ DenseNet-121 Test Accuracy: 0.9858

Classification Report:
              precision    recall  f1-score   support

     biotite       0.97      0.98      0.97       133
     bornite       0.98      1.00      0.99        64
 chrysocolla       0.98      1.00      0.99        87
   malachite       0.99      0.99      0.99       164
   muscovite       1.00      0.88      0.94        50
      pyrite       1.00      0.99      1.00       169
      quartz       0.98      1.00      0.99       180

    accuracy                           0.99       847
   macro avg       0.99      0.98      0.98       847
weighted avg       0.99      0.99      0.99       847


Confusion Matrix:
[[130   1   0   0   0   0   2]
 [  0  64   0   0   0   0   0]
 [  0   0  87   0   0   0   0]
 [  0   0   2 162   0   0   0]
 [  4   0   0   1  44   0   1]
 [  0   0   0   0   0 168   1]
 [  0   0   0   0   0   0 180]]





In [21]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Dataset
full_dataset = datasets.ImageFolder('/content/DatasetFlat/Minet 5640 Images', transform=None)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False, num_workers=2)

# Load MobileNetV2
model = models.mobilenet_v2(pretrained=True)
num_classes = len(full_dataset.classes)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training loop
num_epochs = 5
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/mobilenetv2_best.pth')
print(f"\nBest val accuracy: {best_acc:.4f}")
print("Model saved as saved_models/mobilenetv2_best.pth")


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


Using device: cuda


100%|██████████| 13.6M/13.6M [00:00<00:00, 129MB/s]


Epoch 1/5





Train Loss: 0.7847, Acc: 0.7471
Val Loss: 0.3263, Acc: 0.9087

Epoch 2/5
Train Loss: 0.2631, Acc: 0.9187
Val Loss: 0.2475, Acc: 0.9344

Epoch 3/5
Train Loss: 0.1352, Acc: 0.9619
Val Loss: 0.2233, Acc: 0.9362

Epoch 4/5
Train Loss: 0.0778, Acc: 0.9809
Val Loss: 0.1917, Acc: 0.9406

Epoch 5/5
Train Loss: 0.0576, Acc: 0.9845
Val Loss: 0.2086, Acc: 0.9379

Best val accuracy: 0.9406
Model saved as saved_models/mobilenetv2_best.pth


In [22]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Dataset
dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

# Split only for testing
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load MobileNetV2
model = models.mobilenet_v2(weights=None)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, len(class_names))
model.load_state_dict(torch.load("saved_models/mobilenetv2_best.pth", map_location=device))
model = model.to(device)
model.eval()

# Evaluation loop
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating MobileNetV2"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Metrics
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = (all_preds == all_labels).mean()
print(f"\n✅ MobileNetV2 Test Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Evaluating MobileNetV2: 100%|██████████| 27/27 [00:15<00:00,  1.74it/s]


✅ MobileNetV2 Test Accuracy: 0.9764

Classification Report:
              precision    recall  f1-score   support

     biotite       0.96      0.98      0.97       164
     bornite       0.95      0.98      0.97        61
 chrysocolla       1.00      0.96      0.98        92
   malachite       0.97      0.98      0.97       156
   muscovite       0.98      0.91      0.95        47
      pyrite       0.98      0.99      0.99       157
      quartz       0.99      0.98      0.99       170

    accuracy                           0.98       847
   macro avg       0.98      0.97      0.97       847
weighted avg       0.98      0.98      0.98       847


Confusion Matrix:
[[160   2   0   0   0   0   2]
 [  1  60   0   0   0   0   0]
 [  0   0  88   4   0   0   0]
 [  0   0   0 153   0   3   0]
 [  3   1   0   0  43   0   0]
 [  0   0   0   1   0 156   0]
 [  2   0   0   0   1   0 167]]





In [23]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset and split
full_dataset = datasets.ImageFolder('/content/DatasetFlat/Minet 5640 Images', transform=None)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

# Dataloaders
batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

# Load pretrained MobileNetV3-Large
model = models.mobilenet_v3_large(weights=models.MobileNet_V3_Large_Weights.DEFAULT)
num_classes = len(full_dataset.classes)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training loop
num_epochs = 5
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/mobilenetv3_large_best.pth')
print(f"\n✅ Best val accuracy: {best_acc:.4f}")
print("📦 Model saved to: saved_models/mobilenetv3_large_best.pth")


Using device: cuda


Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-5c1a4163.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-5c1a4163.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 55.1MB/s]



Epoch 1/5
Train Loss: 0.9685, Acc: 0.6797
Val Loss: 0.4336, Acc: 0.8546

Epoch 2/5
Train Loss: 0.3003, Acc: 0.9056
Val Loss: 0.2697, Acc: 0.9211

Epoch 3/5
Train Loss: 0.1414, Acc: 0.9577
Val Loss: 0.1904, Acc: 0.9406

Epoch 4/5
Train Loss: 0.0828, Acc: 0.9752
Val Loss: 0.1819, Acc: 0.9424

Epoch 5/5
Train Loss: 0.0592, Acc: 0.9803
Val Loss: 0.1891, Acc: 0.9459

✅ Best val accuracy: 0.9459
📦 Model saved to: saved_models/mobilenetv3_large_best.pth


In [25]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Transforms for evaluation
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset
dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

# Split dataset into train/val/test (same method)
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load MobileNetV3-Large model
model = models.mobilenet_v3_large(weights=None)
model.classifier[3] = torch.nn.Linear(model.classifier[3].in_features, len(class_names))
model.load_state_dict(torch.load('saved_models/mobilenetv3_large_best.pth', map_location=device))
model = model.to(device)
model.eval()

# Inference loop
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating MobileNetV3-Large"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Accuracy
accuracy = (all_preds == all_labels).mean()
print(f"\n✅ MobileNetV3-Large Test Accuracy: {accuracy:.4f}")

# Classification report
print("\n📊 Classification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
print("\n🧩 Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Using device: cuda


Evaluating MobileNetV3-Large: 100%|██████████| 27/27 [00:17<00:00,  1.53it/s]


✅ MobileNetV3-Large Test Accuracy: 0.9752

📊 Classification Report:
              precision    recall  f1-score   support

     biotite       0.98      0.99      0.98       165
     bornite       0.98      0.93      0.96        60
 chrysocolla       0.95      1.00      0.98        79
   malachite       1.00      0.97      0.99       157
   muscovite       0.97      0.90      0.93        68
      pyrite       0.98      0.99      0.98       155
      quartz       0.96      0.99      0.97       163

    accuracy                           0.98       847
   macro avg       0.97      0.97      0.97       847
weighted avg       0.98      0.98      0.98       847


🧩 Confusion Matrix:
[[163   0   0   0   1   1   0]
 [  0  56   0   0   1   1   2]
 [  0   0  79   0   0   0   0]
 [  0   0   4 153   0   0   0]
 [  4   0   0   0  61   0   3]
 [  0   0   0   0   0 153   2]
 [  0   1   0   0   0   1 161]]





In [26]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.models import swin_t
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms (Swin Transformer typically uses 224x224 input)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset and split
dataset_path = '/content/DatasetFlat/Minet 5640 Images'
full_dataset = datasets.ImageFolder(dataset_path, transform=None)

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms to subsets
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False, num_workers=2)

# Load pretrained Swin Transformer small model
model = swin_t(weights='DEFAULT')  # pretrained weights
num_classes = len(full_dataset.classes)

# Replace head for classification
model.head = nn.Linear(model.head.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

num_epochs = 5
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, dim=1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)

        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/swin_t_best.pth')
print(f"\nBest val accuracy: {best_acc:.4f}")
print("Model saved as saved_models/swin_t_best.pth")


Using device: cuda


Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /root/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:01<00:00, 103MB/s]  



Epoch 1/5
Train Loss: 0.7217, Acc: 0.7575
Val Loss: 0.2989, Acc: 0.9007

Epoch 2/5
Train Loss: 0.2511, Acc: 0.9238
Val Loss: 0.2353, Acc: 0.9309

Epoch 3/5
Train Loss: 0.1357, Acc: 0.9592
Val Loss: 0.2106, Acc: 0.9406

Epoch 4/5
Train Loss: 0.0994, Acc: 0.9714
Val Loss: 0.1889, Acc: 0.9459

Epoch 5/5
Train Loss: 0.0795, Acc: 0.9738
Val Loss: 0.1877, Acc: 0.9486

Best val accuracy: 0.9486
Model saved as saved_models/swin_t_best.pth


In [27]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Test transform (same as validation)
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

dataset_path = '/content/DatasetFlat/Minet 5640 Images'
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)

# Split dataset (train/val/test split assumed, here just for test)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load model architecture
from torchvision.models import swin_t
model = swin_t(weights=None)
num_classes = len(dataset.classes)
model.head = nn.Linear(model.head.in_features, num_classes)
model.load_state_dict(torch.load('saved_models/swin_t_best.pth', map_location=device))
model = model.to(device)
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = (all_preds == all_labels).mean()
print(f"\n✅ Swin Transformer Test Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))

print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Evaluating: 100%|██████████| 18/18 [00:10<00:00,  1.68it/s]


✅ Swin Transformer Test Accuracy: 0.9823

Classification Report:
              precision    recall  f1-score   support

     biotite       0.96      0.98      0.97       112
     bornite       0.98      0.98      0.98        42
 chrysocolla       0.98      0.94      0.96        53
   malachite       0.98      0.99      0.99       103
   muscovite       0.96      0.93      0.95        29
      pyrite       1.00      1.00      1.00       120
      quartz       1.00      0.99      1.00       105

    accuracy                           0.98       564
   macro avg       0.98      0.97      0.98       564
weighted avg       0.98      0.98      0.98       564


Confusion Matrix:
[[110   1   0   0   1   0   0]
 [  1  41   0   0   0   0   0]
 [  1   0  50   2   0   0   0]
 [  0   0   1 102   0   0   0]
 [  2   0   0   0  27   0   0]
 [  0   0   0   0   0 120   0]
 [  1   0   0   0   0   0 104]]





In [29]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset
full_dataset = datasets.ImageFolder('/content/DatasetFlat/Minet 5640 Images', transform=None)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

train_loader = DataLoader(train_subset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False, num_workers=2)

# Load RegNetY_400MF pretrained model
model = models.regnet_y_400mf(pretrained=True)
num_classes = len(full_dataset.classes)

# Replace classifier head
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

num_epochs = 10
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
            loader = train_loader
        else:
            model.eval()
            loader = val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/regnet_y_400mf_best.pth')
print(f"\nBest val accuracy: {best_acc:.4f}")
print("Model saved as saved_models/regnet_y_400mf_best.pth")


Using device: cuda

Epoch 1/10
Train Loss: 0.8924, Acc: 0.7165
Val Loss: 0.3845, Acc: 0.8954

Epoch 2/10
Train Loss: 0.2589, Acc: 0.9289
Val Loss: 0.2521, Acc: 0.9238

Epoch 3/10
Train Loss: 0.1257, Acc: 0.9665
Val Loss: 0.2287, Acc: 0.9317

Epoch 4/10
Train Loss: 0.0729, Acc: 0.9796
Val Loss: 0.2350, Acc: 0.9282

Epoch 5/10
Train Loss: 0.0590, Acc: 0.9829
Val Loss: 0.2393, Acc: 0.9291

Epoch 6/10
Train Loss: 0.0477, Acc: 0.9836
Val Loss: 0.2288, Acc: 0.9353

Epoch 7/10
Train Loss: 0.0416, Acc: 0.9867
Val Loss: 0.2346, Acc: 0.9362

Epoch 8/10
Train Loss: 0.0400, Acc: 0.9858
Val Loss: 0.2356, Acc: 0.9353

Epoch 9/10
Train Loss: 0.0368, Acc: 0.9849
Val Loss: 0.2310, Acc: 0.9362

Epoch 10/10
Train Loss: 0.0384, Acc: 0.9858
Val Loss: 0.2366, Acc: 0.9371

Best val accuracy: 0.9371
Model saved as saved_models/regnet_y_400mf_best.pth


In [31]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define test transform
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# Load dataset
dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

# Split into test subset only
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load RegNetY-400MF model
model = models.regnet_y_400mf(weights=None)
model.fc = torch.nn.Linear(model.fc.in_features, len(class_names))
model.load_state_dict(torch.load('/content/saved_models/regnet_y_400mf_best.pth', map_location=device))
model.to(device)
model.eval()

# Evaluation
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating RegNet"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Metrics
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

accuracy = (all_preds == all_labels).mean()
print(f"\n✅ RegNet Test Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Device: cuda


Evaluating RegNet: 100%|██████████| 27/27 [00:16<00:00,  1.62it/s]


✅ RegNet Test Accuracy: 0.9799

Classification Report:
              precision    recall  f1-score   support

     biotite       0.97      0.99      0.98       152
     bornite       0.97      0.98      0.98        60
 chrysocolla       0.99      0.94      0.96        85
   malachite       0.97      0.99      0.98       145
   muscovite       0.96      0.93      0.95        59
      pyrite       1.00      0.99      0.99       168
      quartz       0.98      0.99      0.99       178

    accuracy                           0.98       847
   macro avg       0.98      0.97      0.98       847
weighted avg       0.98      0.98      0.98       847


Confusion Matrix:
[[150   1   0   0   1   0   0]
 [  1  59   0   0   0   0   0]
 [  0   1  80   4   0   0   0]
 [  0   0   1 143   1   0   0]
 [  2   0   0   0  55   0   2]
 [  0   0   0   0   0 166   2]
 [  1   0   0   0   0   0 177]]





In [32]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import copy
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Data transforms for ViT (input size 224x224)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset
dataset_path = '/content/DatasetFlat/Minet 5640 Images'
full_dataset = datasets.ImageFolder(dataset_path, transform=None)

# Split dataset into train/val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Apply transforms
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

# Data loaders
batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=2)

# Load ViT model
model = models.vit_b_16(pretrained=True)
num_classes = len(full_dataset.classes)
model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
model = model.to(device)

# Loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training loop
num_epochs = 10
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss, running_corrects = 0.0, 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/vit_best.pth')
print(f"\n✅ Best validation accuracy: {best_acc:.4f}")
print("📦 Model saved to saved_models/vit_best.pth")




Using device: cuda


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 155MB/s]



Epoch 1/10
Train Loss: 0.6543, Acc: 0.7857
Val Loss: 0.3488, Acc: 0.8821

Epoch 2/10
Train Loss: 0.1637, Acc: 0.9512
Val Loss: 0.3649, Acc: 0.8945

Epoch 3/10
Train Loss: 0.1034, Acc: 0.9681
Val Loss: 0.2803, Acc: 0.9140

Epoch 4/10
Train Loss: 0.0777, Acc: 0.9750
Val Loss: 0.3208, Acc: 0.9087

Epoch 5/10
Train Loss: 0.0692, Acc: 0.9785
Val Loss: 0.3852, Acc: 0.8954

Epoch 6/10
Train Loss: 0.0349, Acc: 0.9852
Val Loss: 0.3105, Acc: 0.9255

Epoch 7/10
Train Loss: 0.0277, Acc: 0.9869
Val Loss: 0.3179, Acc: 0.9229

Epoch 8/10
Train Loss: 0.0256, Acc: 0.9885
Val Loss: 0.3069, Acc: 0.9273

Epoch 9/10
Train Loss: 0.0244, Acc: 0.9885
Val Loss: 0.3086, Acc: 0.9291

Epoch 10/10
Train Loss: 0.0246, Acc: 0.9883
Val Loss: 0.3326, Acc: 0.9238

✅ Best validation accuracy: 0.9291
📦 Model saved to saved_models/vit_best.pth


In [33]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Define test transform
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# Load dataset and class names
dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

# Split and create test set
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

# Load the ViT model
model = models.vit_b_16(weights=None)
model.heads.head = torch.nn.Linear(model.heads.head.in_features, len(class_names))
model.load_state_dict(torch.load('saved_models/vit_best.pth', map_location=device))
model.to(device)
model.eval()

# Run predictions
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating ViT"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Accuracy
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
accuracy = (all_preds == all_labels).mean()
print(f"\n✅ ViT Test Accuracy: {accuracy:.4f}")

# Classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Device: cuda


Evaluating ViT: 100%|██████████| 27/27 [00:24<00:00,  1.09it/s]


✅ ViT Test Accuracy: 0.9740

Classification Report:
              precision    recall  f1-score   support

     biotite       0.94      0.99      0.96       161
     bornite       0.98      0.97      0.98        66
 chrysocolla       1.00      0.99      0.99        82
   malachite       1.00      1.00      1.00       133
   muscovite       0.96      0.89      0.93        57
      pyrite       0.99      0.98      0.98       166
      quartz       0.97      0.96      0.96       182

    accuracy                           0.97       847
   macro avg       0.98      0.97      0.97       847
weighted avg       0.97      0.97      0.97       847


Confusion Matrix:
[[159   1   0   0   1   0   0]
 [  1  64   0   0   0   0   1]
 [  0   0  81   0   0   0   1]
 [  0   0   0 133   0   0   0]
 [  5   0   0   0  51   0   1]
 [  1   0   0   0   0 162   3]
 [  4   0   0   0   1   2 175]]





In [34]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import timm
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5],
                         [0.5, 0.5, 0.5])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5],
                         [0.5, 0.5, 0.5])
])

# Dataset
dataset_path = '/content/DatasetFlat/Minet 5640 Images'
full_dataset = datasets.ImageFolder(dataset_path, transform=None)
num_classes = len(full_dataset.classes)

# Split
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Load BEiT from timm
model = timm.create_model("beit_base_patch16_224", pretrained=True)
model.head = nn.Linear(model.head.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# Training Loop
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

for epoch in range(5):
    print(f"\nEpoch {epoch+1}/5")
    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        loader = train_loader if phase == 'train' else val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                preds = outputs.argmax(dim=1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(loader.dataset)
        epoch_acc = running_corrects / len(loader.dataset)
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    scheduler.step()

# Save best model
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'saved_models/beit_best.pth')
print(f"\n✅ BEiT Best Val Accuracy: {best_acc:.4f}")


Device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/350M [00:00<?, ?B/s]


Epoch 1/5
Train Loss: 0.6464 Acc: 0.7934
Val Loss: 0.2349 Acc: 0.9353

Epoch 2/5
Train Loss: 0.1164 Acc: 0.9641
Val Loss: 0.1852 Acc: 0.9433

Epoch 3/5
Train Loss: 0.0520 Acc: 0.9852
Val Loss: 0.1606 Acc: 0.9477

Epoch 4/5
Train Loss: 0.0324 Acc: 0.9891
Val Loss: 0.1485 Acc: 0.9548

Epoch 5/5
Train Loss: 0.0274 Acc: 0.9887
Val Loss: 0.1489 Acc: 0.9548

✅ BEiT Best Val Accuracy: 0.9548


In [39]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import timm
from tqdm import tqdm
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Dataset and transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

dataset_path = "/content/DatasetFlat/Minet 5640 Images"
full_dataset = datasets.ImageFolder(dataset_path, transform=transform)
class_names = full_dataset.classes

# Split dataset
train_size = int(0.7 * len(full_dataset))
val_size = int(0.15 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
_, _, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load BEiT model from timm
model = timm.create_model('beit_base_patch16_224', pretrained=False, num_classes=len(class_names))
model.load_state_dict(torch.load('saved_models/beit_best.pth', map_location=device))
model = model.to(device)
model.eval()

# Evaluation loop
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc="Evaluating"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Accuracy
accuracy = (all_preds == all_labels).mean()
print(f"\n✅ BEiT Test Accuracy: {accuracy:.4f}")

# Classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


Device: cuda


Evaluating: 100%|██████████| 27/27 [00:24<00:00,  1.11it/s]


✅ BEiT Test Accuracy: 0.3589

Classification Report:
              precision    recall  f1-score   support

     biotite       0.36      0.36      0.36       158
     bornite       0.31      0.14      0.19        65
 chrysocolla       0.39      0.20      0.26        90
   malachite       0.57      0.15      0.24       135
   muscovite       0.10      0.16      0.13        49
      pyrite       0.44      0.50      0.47       157
      quartz       0.35      0.59      0.44       193

    accuracy                           0.36       847
   macro avg       0.36      0.30      0.30       847
weighted avg       0.39      0.36      0.34       847


Confusion Matrix:
[[ 57   1   3   0   8  32  57]
 [ 17   9   2   0   8  11  18]
 [ 19   2  18   6  16   9  20]
 [  7   5  22  20  13   9  59]
 [  9   1   0   2   8  13  16]
 [ 28   0   0   3   8  79  39]
 [ 21  11   1   4  17  26 113]]





In [44]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
from tqdm import tqdm
import re
import pandas as pd
import timm  # For BEiT model loading

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("🔧 Device:", device)

# Test data transform
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load dataset and create test split
dataset_path = "/content/DatasetFlat/Minet 5640 Images"
dataset = datasets.ImageFolder(dataset_path, transform=transform_test)
class_names = dataset.classes

train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
_, _, test_subset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
test_loader = DataLoader(test_subset, batch_size=32, shuffle=False)

num_classes = len(class_names)

# Mapping model keys to their model loader name in torchvision or 'timm' for beit
model_files = {
    "beit": "beit_best.pth",
    "convnext": "convnext_best.pth",
    "densenet121": "densenet121_best.pth",
    "efficientnet_b0": "efficientnet_bo_best.pth",
    "mobilenetv2": "mobilenetv2_best.pth",
    "mobilenetv3_large": "mobilenetv3_large_best.pth",
    "regnet_y_400mf": "regnet_y_400mf_best.pth",
    "swin_t": "swin_t_best.pth",
    "vit": "vit_best.pth"
}

# Classifier layer info: (path to classifier layer, input features)
classifier_layers = {
    "beit": ("head", 768),
    "convnext": ("classifier[2]", 1024),
    "densenet121": ("classifier", 1024),
    "efficientnet_b0": ("classifier[1]", 1280),
    "mobilenetv2": ("classifier[1]", 1280),
    "mobilenetv3_large": ("classifier[3]", 1280),
    "regnet_y_400mf": ("fc", 440),
    "swin_t": ("head", 768),
    "vit": ("heads.head", 768),
}

# Helper to set the classifier layer safely
def set_model_head(model, head_path, in_features, num_classes):
    attrs = re.split(r'\.(?![^\[]*\])', head_path)  # Split on '.' but ignore those inside brackets
    current = model
    for i, attr in enumerate(attrs[:-1]):
        if '[' in attr:
            # e.g. classifier[2]
            name, idx = re.match(r'(\w+)\[(\d+)\]', attr).groups()
            current = getattr(current, name)[int(idx)]
        else:
            current = getattr(current, attr)

    last = attrs[-1]
    if '[' in last:
        name, idx = re.match(r'(\w+)\[(\d+)\]', last).groups()
        module = getattr(current, name)
        module[int(idx)] = nn.Linear(in_features, num_classes)
    else:
        setattr(current, last, nn.Linear(in_features, num_classes))

results = []

for model_key, pth_file in model_files.items():
    print(f"\n🔍 Evaluating {model_key}")

    # Load model
    if model_key == "beit":
        model = timm.create_model("beit_base_patch16_224", pretrained=False, num_classes=num_classes)
    else:
        # For torchvision model names that differ slightly, fix here if needed
        torch_model_name = model_key
        if model_key == "efficientnet_b0":
            torch_model_name = "efficientnet_b0"
        model = getattr(models, torch_model_name)(weights=None)

    # Set classifier head
    head_path, in_features = classifier_layers[model_key]
    set_model_head(model, head_path, in_features, num_classes)

    # Load weights
    model.load_state_dict(torch.load(f"saved_models/{pth_file}", map_location=device))
    model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc=model_key):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    acc = (all_preds == all_labels).mean()
    print(f"\n✅ {model_key} Accuracy: {acc:.4f}")

    report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True)
    # You can save precision, recall, f1-score for 'macro avg' or 'weighted avg'
    macro_f1 = report['macro avg']['f1-score']
    weighted_f1 = report['weighted avg']['f1-score']

    results.append({
        "Model": model_key,
        "Accuracy": acc,
        "Macro F1": macro_f1,
        "Weighted F1": weighted_f1
    })

# Display summary table
df_results = pd.DataFrame(results)
print("\n\n📋 Summary of all models:")
print(df_results)


🔧 Device: cuda

🔍 Evaluating beit


beit: 100%|██████████| 27/27 [00:26<00:00,  1.02it/s]


✅ beit Accuracy: 0.3766

🔍 Evaluating convnext





TypeError: 'module' object is not callable

In [42]:
import shutil
import os
from google.colab import files

model_dir = "/content/saved_models"

# Compress the folder into a zip file
shutil.make_archive('saved_models_backup', 'zip', model_dir)

# Download the zip file
files.download('saved_models_backup.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>