In [3]:
#Now to mount drive here
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
#after mounting drive we need to unzip the folder and use it
import zipfile

zip_file_path = '/content/drive/MyDrive/archive(1).zip'
target_directory = '/content'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(target_directory)

In [2]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print('Found GPU. Using GPU.')
    strategy = tf.distribute.MirroredStrategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
else:
    try:
        resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(resolver)
        tf.tpu.experimental.initialize_tpu_system(resolver)
        print('Found TPU. Using TPU.')
        strategy = tf.distribute.TPUStrategy(resolver)
    except:
        print('No GPU or TPU found. Using CPU.')
        strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")

print('Number of replicas: {}'.format(strategy.num_replicas_in_sync))

Found GPU. Using GPU.
Number of devices: 1
Number of replicas: 1


In [3]:
from google.colab.patches import cv2_imshow
import cv2

def preprocess_image(image_path):
    # Read the image
    original_image = cv2.imread(image_path)
    original_image = cv2.resize(original_image, (224, 224))

    # Convert the image to grayscale
    gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)

    # Apply threshold to obtain a binary mask for the white background
    _, mask = cv2.threshold(gray_image, 200, 255, cv2.THRESH_BINARY)

    # Invert the mask to get the foreground
    mask = cv2.bitwise_not(mask)

    # Apply Gaussian Blur to the image
    blurred_image = cv2.GaussianBlur(original_image, (15, 15), 0)

    # Sharpen the image
    sharpened_image = cv2.addWeighted(blurred_image, 1.5, original_image, -0.5, 0)

    # Combine the sharpened image with the white background mask
    result_image = cv2.bitwise_and(sharpened_image, sharpened_image, mask=mask)

    return result_image

In [4]:
import shutil
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

input_directory = '/content/Leaves'
output_directory = '/content/Leaves_no_bg'
csv_file = '/content/Leaves/all.csv'

df = pd.read_csv(csv_file)
with strategy.scope():
  for index, row in df.iterrows():
      image_name = row['id']
      class_number = row['y']
      class_directory = os.path.join(output_directory, str(class_number))
      os.makedirs(class_directory, exist_ok=True)
      input_path = os.path.join(input_directory, image_name)
      output_path = os.path.join(class_directory, image_name)
      processed_image = preprocess_image(input_path)
      cv2.imwrite(output_path, processed_image)

print("Images organized into class-specific folders after background removal.")

Images organized into class-specific folders after background removal.


In [5]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
transform = transforms.Compose([  # Convert to PIL Image
    transforms.Resize((224, 224)),  # Resize to desired size
  # Use your custom FeatureExtractor
    transforms.ToTensor(),  # Convert back to PyTorch tensor
])

In [6]:

data_path = "/content/Leaves_no_bg"

# Create a dataset from the folder structure
dataset = datasets.ImageFolder(root=data_path, transform=transform)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [7]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from transformers import ViTFeatureExtractor, ViTForImageClassification
from sklearn.model_selection import train_test_split
from PIL import Image

In [8]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224-in21k')

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
num_classes = 32
model.classifier = nn.Linear(model.config.hidden_size, num_classes)

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [11]:
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

In [14]:
import torch

num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Free up GPU memory
        del images, labels, outputs
        torch.cuda.empty_cache()

    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        correct = 0
        total = 0
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Free up GPU memory
            del images, labels, outputs
            torch.cuda.empty_cache()

        val_loss /= len(val_loader)
        accuracy = correct / total

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {val_loss:.4f}, Accuracy: {accuracy*100:.2f}%')


Epoch 1/5, Loss: 2.2048, Accuracy: 95.29%
Epoch 2/5, Loss: 1.7821, Accuracy: 97.12%
Epoch 3/5, Loss: 1.4766, Accuracy: 98.95%
Epoch 4/5, Loss: 1.2532, Accuracy: 99.48%
Epoch 5/5, Loss: 1.0695, Accuracy: 99.74%
