<a href="https://colab.research.google.com/github/lavanyasatpute/lavanyasatpute/blob/main/Lavanya_with_Swin_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2DTranspose, Flatten, Dropout, Dense, GlobalAveragePooling1D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler

# Define MLP Layer
class MLP(tf.keras.layers.Layer):
    def __init__(self, in_features, hidden_features=None, out_features=None, activation=tf.nn.gelu, dropout_rate=0.0):
        super(MLP, self).__init__()
        self.hidden_features = hidden_features or in_features
        self.out_features = out_features or in_features
        self.activation = activation
        self.fc1 = tf.keras.layers.Dense(self.hidden_features)
        self.act = tf.keras.layers.Activation(self.activation)
        self.drop1 = tf.keras.layers.Dropout(dropout_rate)
        self.fc2 = tf.keras.layers.Dense(self.out_features)
        self.drop2 = tf.keras.layers.Dropout(dropout_rate)

    def call(self, x, training=False):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop1(x, training=training)
        x = self.fc2(x)
        x = self.drop2(x, training=training)
        return x

# Define Patch Embedding Layer
class PatchEmbedding(tf.keras.layers.Layer):
    def __init__(self, embed_dim=96, patch_size=4):
        super(PatchEmbedding, self).__init__()
        self.embed_dim = embed_dim
        self.patch_size = patch_size
        self.proj = tf.keras.layers.Conv2D(embed_dim, kernel_size=patch_size, strides=patch_size)

    def call(self, x):
        x = self.proj(x)
        return tf.reshape(x, [tf.shape(x)[0], -1, self.embed_dim])

# Define WindowAttention Layer
class WindowAttention(tf.keras.layers.Layer):
    def __init__(self, dim, num_heads, window_size, dropout_rate=0.0):
        super(WindowAttention, self).__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.scale = (dim // num_heads) ** -0.5
        self.qkv = tf.keras.layers.Dense(dim * 3)
        self.proj = tf.keras.layers.Dense(dim)
        self.attn_dropout = tf.keras.layers.Dropout(dropout_rate)
        self.proj_dropout = tf.keras.layers.Dropout(dropout_rate)

    def call(self, x):
        B, N, C = tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2]
        qkv = self.qkv(x)
        qkv = tf.reshape(qkv, (B, N, 3, self.num_heads, C // self.num_heads))
        qkv = tf.transpose(qkv, perm=[2, 0, 3, 1, 4])
        q, k, v = qkv[0], qkv[1], qkv[2]
        q = q * self.scale
        attn = tf.nn.softmax(tf.matmul(q, k, transpose_b=True), axis=-1)
        attn = self.attn_dropout(attn)
        x = tf.matmul(attn, v)
        x = tf.transpose(x, perm=[0, 2, 1, 3])
        x = tf.reshape(x, (B, N, C))
        x = self.proj(x)
        x = self.proj_dropout(x)
        return x

# Define Swin Transformer Block
class SwinTransformerBlock(tf.keras.layers.Layer):
    def __init__(self, dim, num_heads, window_size, shift_size=0, mlp_ratio=4., dropout_rate=0.0):
        super(SwinTransformerBlock, self).__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
        self.attn = WindowAttention(dim, num_heads, window_size, dropout_rate)
        self.norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
        self.mlp = MLP(in_features=dim, hidden_features=int(dim * mlp_ratio), dropout_rate=dropout_rate)

    def call(self, x):
        shortcut = x
        x = self.norm1(x)
        x = self.attn(x)
        x = x + shortcut
        x = self.norm2(x)
        x = self.mlp(x)
        return x

In [None]:
# Define the ResNet-Swin Transformer Model
def create_resnet_swin_model(input_shape, num_classes):
    # Load ResNet50 without the top layer
    resnet = ResNet50(include_top=False, weights="imagenet", input_shape=input_shape)
    x = resnet.output

    # Upsample ResNet output to match the Swin input size
    upsampled_output = Conv2DTranspose(96, kernel_size=(4, 4), strides=(4, 4), padding='same')(x)

    # Add Patch Embedding layer
    swin_input = PatchEmbedding(embed_dim=96)(upsampled_output)

    # Add Swin Transformer block
    swin_block = SwinTransformerBlock(dim=96, num_heads=3, window_size=7)
    swin_output = swin_block(swin_input)

    # Add Global Average Pooling and output layer
    x = GlobalAveragePooling1D()(swin_output)
    outputs = Dense(num_classes, activation='softmax')(x)

    # Create the final model
    model = Model(inputs=resnet.input, outputs=outputs)
    return model

# Instantiate and compile the model
model = create_resnet_swin_model(input_shape=(32, 32, 3), num_classes=10)

# Define an adaptive learning rate scheduler
def lr_schedule(epoch, lr):
    if epoch < 5:
        return lr
    elif epoch < 10:
        return lr * 0.1
    else:
        return lr * 0.01

# Compile the model
sgd = SGD(learning_rate=1e-2, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

# Learning rate scheduler callback
lr_callback = LearningRateScheduler(lr_schedule)

# Print the model summary
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths
dataset_path = '/content/drive/MyDrive/My Documents/final datset'
train_path = 'Dataset_splits/train'
valid_path = 'Dataset_splits/valid'
test_path = 'Dataset_splits/test'

# Create directories for train, valid, and test splits
os.makedirs(train_path, exist_ok=True)
os.makedirs(valid_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)

classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

for cls in classes:
    class_path = os.path.join(dataset_path, cls)
    images = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]

    # Split images into train, valid, and test sets
    train_imgs, temp_imgs = train_test_split(images, test_size=0.30, random_state=42)  # 75% train, 15% valid + 15% test
    valid_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.50, random_state=42)  # 50% of the remaining 30% = 15% valid, 15% test

    # Create class directories in train, valid, and test splits
    os.makedirs(os.path.join(train_path, cls), exist_ok=True)
    os.makedirs(os.path.join(valid_path, cls), exist_ok=True)
    os.makedirs(os.path.join(test_path, cls), exist_ok=True)

    # Copy images to respective directories
    for img in train_imgs:
        shutil.copy(img, os.path.join(train_path, cls))
    for img in valid_imgs:
        shutil.copy(img, os.path.join(valid_path, cls))
    for img in test_imgs:
        shutil.copy(img, os.path.join(test_path, cls))

print("Dataset splitting complete!")

Dataset splitting complete!


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define the path to your dataset
train_data_dir = '/content/Dataset_splits/train'
validation_data_dir = '/content/Dataset_splits/valid'
weights_dir = '/content/model_weights/'  # Directory to save weights

# Create the directory if it doesn't exist
if not os.path.exists(weights_dir):
    os.makedirs(weights_dir)

# Define image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 4

# Data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.8, 1.2]
)

# Rescaling for validation set
test_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    brightness_range=[0.8, 1.2]
    )

# Load the training data
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

# Load the validation data
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

Found 2942 images belonging to 9 classes.
Found 633 images belonging to 9 classes.


In [None]:
checkpoint = ModelCheckpoint(
    filepath=os.path.join(weights_dir, 'weights_epoch_{epoch:02d}.weights.h5'),
    save_weights_only=True,  # Only save the weights
    save_freq='epoch'    # Save after every epoch
)

In [None]:
model.summary()

In [None]:
from tensorflow.keras.datasets import cifar10
import numpy as np

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Resize images to 28x28
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

x_train = [img_to_array(array_to_img(img).resize((32, 32))) for img in x_train]
x_test = [img_to_array(array_to_img(img).resize((32, 32))) for img in x_test]

x_train = np.array(x_train)
x_test = np.array(x_test)

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# One-hot encode labels
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
model.fit(
       x_train,
       y_train,
       batch_size=32,
       epochs=1,
       validation_data=(x_test, y_test),
       callbacks=[checkpoint, lr_callback]
   )

[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 48ms/step - accuracy: 0.5134 - loss: 1.3901 - val_accuracy: 0.5769 - val_loss: 1.3513 - learning_rate: 0.0100


<keras.src.callbacks.history.History at 0x78e794e154b0>

In [None]:
model.fit(
       x_train,
       y_train,
       batch_size=32,
       epochs=5,
       validation_data=(x_test, y_test),
       callbacks=[checkpoint, lr_callback]
   )

Epoch 1/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 34ms/step - accuracy: 0.7335 - loss: 0.7833 - val_accuracy: 0.6279 - val_loss: 1.1481 - learning_rate: 0.0100
Epoch 2/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 33ms/step - accuracy: 0.7945 - loss: 0.6144 - val_accuracy: 0.6486 - val_loss: 1.0630 - learning_rate: 0.0100
Epoch 3/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 34ms/step - accuracy: 0.8308 - loss: 0.5032 - val_accuracy: 0.7528 - val_loss: 0.7654 - learning_rate: 0.0100
Epoch 4/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 32ms/step - accuracy: 0.8635 - loss: 0.4120 - val_accuracy: 0.6856 - val_loss: 1.0659 - learning_rate: 0.0100
Epoch 5/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 32ms/step - accuracy: 0.8841 - loss: 0.3461 - val_accuracy: 0.7764 - val_loss: 0.6951 - learning_rate: 0.0100


<keras.src.callbacks.history.History at 0x78e794e16410>