In [11]:
import torch
print(torch.__version__)  # Should NOT have "+cpu"
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")


2.4.1+cu121
True
NVIDIA GeForce RTX 2080 SUPER


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.linear = nn.Linear(10, 1)  # Simple linear layer

    def forward(self, x):
        return self.linear(x)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleModel().to(device)

# Print model details
print("Model is on:", device)


Model is on: cuda


In [13]:
# Install required libraries if not already installed
!pip install tensorflow keras numpy pandas matplotlib opencv-python scikit-learn albumentations einops

# Importing essential libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import os

from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from einops import rearrange, repeat  # For Vision Transformer operations
from tensorflow.keras.optimizers import Adam




In [17]:
# Define dataset path
#dataset_path = r"E:\21F-52 FYP Project\dataset"

# Correct file name
#csv_file = os.path.join(dataset_path, "ISIC_2020_Test_Metadata.csv")  # Ensure this name is correct

# Check if the file exists
#if os.path.exists(csv_file):
 #   print("✅ File found! Loading CSV...")
  #  df = pd.read_csv(csv_file)  # Use read_csv for CSV files
   # print(df.head())  # Display first few rows
#else:
 #   print("❌ File not found! Check the filename and extension.")




# Set dataset path (Update this path according to your system)
dataset_path = r"E:\21F-52 FYP Project\dataset"

# Load CSV file (Update the filename if necessary)
df = pd.read_csv(os.path.join(dataset_path, "ISIC_2020_Training_GroundTruth.csv"))

# Extract image names and labels (Update column name if necessary)
image_names = df["image_name"].values  # Corrected column name
labels = df["target"].values  # Corrected column name

# Convert labels to categorical (One-Hot Encoding for binary classification)
labels = to_categorical(labels, num_classes=2)

# Function to load and preprocess images
def load_image(image_name, img_size=224):
    image_path = os.path.join(dataset_path, "train", image_name + ".jpg")
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error loading image: {image_name}")  # Debugging
        return np.zeros((img_size, img_size, 3))  # Return blank image if not found
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
    img = cv2.resize(img, (img_size, img_size))  # Resize for ViT
    img = img / 255.0  # Normalize
    return img

# Load all images into a NumPy array
images = np.array([load_image(name) for name in image_names])

# Split dataset into training (80%) and validation (20%)
train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

# Convert to NumPy arrays (Ensures compatibility with TensorFlow)
train_images = np.array(train_images)
val_images = np.array(val_images)

In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_hub as hub

# Load pretrained ViT model from TensorFlow Hub
vit_model = hub.KerasLayer("https://tfhub.dev/google/vit-base-patch16-224/1", trainable=True)

# Build the model
inputs = layers.Input(shape=(224, 224, 3))  # Input shape matches preprocessed images
x = vit_model(inputs)  # Pass images through ViT
x = layers.Dense(512, activation="relu")(x)  # Fully connected layer
x = layers.Dropout(0.3)(x)  # Regularization
x = layers.Dense(2, activation="softmax")(x)  # Output layer (2 classes: benign/malignant)

# Compile model
model = keras.Model(inputs, x)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

# Model summary
model.summary()


ModuleNotFoundError: No module named 'tensorflow_hub'

In [19]:
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, LayerNormalization, MultiHeadAttention, Add, Embedding, GlobalAveragePooling1D
from tensorflow.keras.models import Model
import tensorflow as tf

# Define Vision Transformer Model
def VisionTransformer(img_size=224, patch_size=16, num_classes=2, d_model=64, num_heads=8, num_layers=4):
    input_layer = Input(shape=(img_size, img_size, 3))

    # Patch Embedding (Convert Image to Patches)
    x = Conv2D(d_model, kernel_size=(patch_size, patch_size), strides=(patch_size, patch_size), padding="valid")(input_layer)
    x = LayerNormalization()(x)
    x = Flatten()(x)  # Convert to 2D shape for transformer input

    # Calculate the number of patches
    num_patches = (img_size // patch_size) ** 2  # (224/16)² = 14x14 = 196 patches

    # Corrected Position Embedding
    position_embedding = Embedding(input_dim=num_patches, output_dim=d_model)(tf.range(num_patches))

    # Reshape both tensors to (batch_size, num_patches, d_model)
    x = tf.reshape(x, (-1, num_patches, d_model))  # Ensure correct shape
    position_embedding = tf.reshape(position_embedding, (1, num_patches, d_model))  # Add batch dimension

    # Add position embeddings
    x = Add()([x, position_embedding])  

    # Transformer Encoder Layers
    for _ in range(num_layers):
        # Multi-Head Self Attention
        attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x)
        attn_output = Add()([x, attn_output])  # Residual connection
        attn_output = LayerNormalization()(attn_output)

        # Feed Forward Network (MLP)
        ff_output = Dense(4 * d_model, activation="relu")(attn_output)
        ff_output = Dense(d_model)(ff_output)
        x = Add()([attn_output, ff_output])  # Residual connection
        x = LayerNormalization()(x)

    # Classification Head
    x = GlobalAveragePooling1D()(x)  # Global Pooling
    x = Dropout(0.2)(x)
    output_layer = Dense(num_classes, activation="softmax")(x)  # Final classification layer

    # Create Model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Instantiate and compile the model
model = VisionTransformer()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Print model summary
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 14, 14, 64)           49216     ['input_1[0][0]']             
                                                                                                  
 layer_normalization (Layer  (None, 14, 14, 64)           128       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 flatten (Flatten)           (None, 12544)                0         ['layer_normalization[0][0

In [25]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    dataset_path,  # <- Use dataset_path, NOT dataset_path + "/train"
    target_size=(224, 224),
    batch_size=16,
    class_mode="input",  # Use 'input' if no labels
    subset="training",
    shuffle=True
)


Found 26502 images belonging to 2 classes.


In [27]:
def load_image(image_name, img_size=128):  # Reduced size
    image_path = os.path.join(dataset_path, "train", image_name + ".jpg")
    img = cv2.imread(image_path)
    if img is None:
        return np.zeros((img_size, img_size, 3))  # Return blank image if missing
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (img_size, img_size))
    img = img / 255.0  # Normalize
    return img.astype("float32")


In [28]:
history = model.fit(
    train_images, train_labels,
    validation_data=(val_images, val_labels),
    epochs=20,
    batch_size=8  # Reduce from 16 to 8
)


MemoryError: Unable to allocate 29.7 GiB for an array with shape (26500, 224, 224, 3) and data type float64