In [1]:
pip install torch torchvision transformers timm


Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from transformers import DeiTForImageClassification, DeiTFeatureExtractor
from torchvision import transforms
from PIL import Image
import os

# Load pre-trained DeiT model for image classification
model = DeiTForImageClassification.from_pretrained('facebook/deit-base-distilled-patch16-224')
feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')


config.json:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/349M [00:00<?, ?B/s]

Some weights of DeiTForImageClassification were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]



In [3]:
import os
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn import preprocessing
from transformers import DeiTFeatureExtractor
import torch

# Folder names representing class labels
class_names = [
    "8Carcinoma",
    "6Polyps and cholesterol crystals",
    "3cholecystitis",
    "9Various causes of gallbladder wall thickening",
    "7Adenomyomatosis",
    "5Perforation",
    "1Gallstones",
    "2Abdomen and retroperitoneum",
    "4Membranous and gangrenous cholecystitis"
]

# Create label encoder for folder names
label_encoder = preprocessing.LabelEncoder()
label_encoder.fit(class_names)

# Custom Dataset Class
class GallbladderDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')  # Convert to RGB if grayscale
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Define the transformations (resize, normalize, etc.)
feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

# Prepare the dataset paths and labels
dataset_path = '/kaggle/input/gallbladder/Gallblader Diseases Dataset'  # Replace with the path to your dataset

image_paths = []
labels = []

# Scan through all folders in the dataset
for folder_name in os.listdir(dataset_path):
    folder_path = os.path.join(dataset_path, folder_name)
    if os.path.isdir(folder_path):
        # Get all images in the folder
        for img_name in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_name)
            if img_name.endswith(('jpg', 'jpeg', 'png')):  # Add more formats if needed
                image_paths.append(img_path)
                label = label_encoder.transform([folder_name])[0]
                labels.append(label)

# Split the dataset into training and testing sets
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.2, stratify=labels)

# Create Dataset instances
train_dataset = GallbladderDataset(image_paths=train_paths, labels=train_labels, transform=transform)
test_dataset = GallbladderDataset(image_paths=test_paths, labels=test_labels, transform=transform)

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)





In [4]:
# Check if CUDA (GPU) is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the chosen device
model.to(device)


DeiTForImageClassification(
  (deit): DeiTModel(
    (embeddings): DeiTEmbeddings(
      (patch_embeddings): DeiTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): DeiTEncoder(
      (layer): ModuleList(
        (0-11): 12 x DeiTLayer(
          (attention): DeiTSdpaAttention(
            (attention): DeiTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): DeiTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): DeiTIntermediate(
            (dense): L

In [7]:
from sklearn.metrics import classification_report
import numpy as np

# Check if CUDA (GPU) is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the chosen device
model.to(device)

# Set up optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0
    all_train_preds = []
    all_train_labels = []
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device
        
        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        
        # Backpropagation and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Collect predictions and true labels
        _, predicted = torch.max(outputs, 1)
        all_train_preds.extend(predicted.cpu().numpy())
        all_train_labels.extend(labels.cpu().numpy())

    # Calculate classification report for training set
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")
    print("Training Classification Report:")
    
    # Ensure labels in the report are the same as the target classes
    print(classification_report(
        all_train_labels, all_train_preds, 
        target_names=class_names, 
        labels=np.arange(len(class_names))  # Explicitly specify the labels
    ))
    
    # Evaluation on Test Set after every epoch
    model.eval()  # Set model to evaluation mode
    all_test_preds = []
    all_test_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device
            outputs = model(images).logits
            _, predicted = torch.max(outputs, 1)
            
            # Collect predictions and true labels
            all_test_preds.extend(predicted.cpu().numpy())
            all_test_labels.extend(labels.cpu().numpy())

    # Calculate classification report for test set
    print("Test Classification Report:")
    print(classification_report(
        all_test_labels, all_test_preds, 
        target_names=class_names, 
        labels=np.arange(len(class_names))  # Explicitly specify the labels
    ))


Epoch [1/5], Loss: 0.0425
Training Classification Report:
                                                precision    recall  f1-score   support

                                    8Carcinoma       0.98      0.98      0.98      1061
              6Polyps and cholesterol crystals       0.99      0.99      0.99       936
                                3cholecystitis       0.98      0.98      0.98       917
9Various causes of gallbladder wall thickening       0.99      0.99      0.99       979
                              7Adenomyomatosis       0.98      0.98      0.98       849
                                  5Perforation       0.99      0.99      0.99       816
                                   1Gallstones       0.99      0.99      0.99       931
                  2Abdomen and retroperitoneum       0.99      0.99      0.99      1272
      4Membranous and gangrenous cholecystitis       0.99      0.99      0.99       792

                                      accuracy              