<a href="https://colab.research.google.com/github/nidish40/CricketShot_Classification/blob/main/DL_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install kaggle

> Cricket shot Classification using Vision transformers

In [None]:
import os
import zipfile
os.makedirs("/root/.kaggle", exist_ok=True)

In [None]:
import shutil

shutil.move("/content/kaggle.json", "/root/.kaggle/kaggle.json")


In [None]:
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle datasets download aneesh10/cricket-shot-dataset
!unzip cricket-shot-dataset.zip -d ./dataset

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Patch Embedding
class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size, embed_dim):
        super().__init__()
        self.projection = layers.Conv2D(embed_dim, kernel_size=patch_size, strides=patch_size, padding="valid")
        self.flatten = layers.Reshape((-1, embed_dim))

    def call(self, x):
        x = self.projection(x)
        return self.flatten(x)

# Multi-Head Attention Block
def transformer_block(embed_dim, num_heads, mlp_dim, block_name=None): #
    inputs = layers.Input(shape=(None, embed_dim))

    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
    x = layers.Add()([x, inputs])

    y = layers.LayerNormalization()(x)
    y = layers.Dense(mlp_dim, activation="gelu")(y)
    y = layers.Dense(embed_dim)(y)
    out = layers.Add()([x, y])

    return keras.Model(inputs, out, name=block_name)

# Shot-ViT Model
def build_shot_vit(image_size=128, patch_size=64, embed_dim=512, num_heads=8, mlp_dim=2048, num_layers=6, num_classes=4):
    inputs = keras.Input(shape=(image_size, image_size, 3))

    # Patch Embedding
    patches = PatchEmbedding(patch_size, embed_dim)(inputs)

    # Positional Encoding
    num_patches = (image_size // patch_size) ** 2
    pos_embedding = layers.Embedding(input_dim=num_patches, output_dim=embed_dim)(tf.range(num_patches))
    x = patches + pos_embedding

    # Transformer Blocks
    for i in range(num_layers):
        x = transformer_block(embed_dim, num_heads, mlp_dim, block_name=f"TransformerBlock_{i}")(x) # Pass unique block_name

    # Classification Head
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation="gelu")(x)
    x = layers.Dense(32, activation="gelu")(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return keras.Model(inputs, outputs, name="ViT-Shot")

# Create the model
shot_vit = build_shot_vit(embed_dim=512, mlp_dim=2048, num_layers=8) # Reduced model complexity
shot_vit.summary()

In [None]:
import os
import tensorflow as tf

# Define dataset path
dataset_path = "/content/dataset/data"

# Define parameters
batch_size = 64
img_size = (128, 128)
seed = 123

# Load training dataset (70%)
train_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    validation_split=0.3,  # Total 30% split to use later
    subset="training",
    seed=seed,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="int"
)

# Load validation+test dataset (30%)
val_test_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    validation_split=0.3,
    subset="validation",
    seed=seed,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="int"
)

# Now split val_test_ds into 50/50 -> 15% validation, 15% test
val_batches = tf.data.experimental.cardinality(val_test_ds) // 2

val_ds = val_test_ds.take(val_batches)
test_ds = val_test_ds.skip(val_batches)

# Preprocess (normalize) function
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

AUTOTUNE = tf.data.AUTOTUNE

# Apply normalization and prefetch
train_ds = train_ds.map(preprocess).cache().prefetch(AUTOTUNE)
val_ds = val_ds.map(preprocess).cache().prefetch(AUTOTUNE)
test_ds = test_ds.map(preprocess).cache().prefetch(AUTOTUNE)

# Print size info
print(f"Train batches: {tf.data.experimental.cardinality(train_ds).numpy()}")
print(f"Validation batches: {tf.data.experimental.cardinality(val_ds).numpy()}")
print(f"Test batches: {tf.data.experimental.cardinality(test_ds).numpy()}")
class_names = sorted(os.listdir(dataset_path))
print("Classes:", class_names)

In [None]:
shot_vit.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [None]:
epochs = 15  # Adjust as needed

history = shot_vit.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

In [None]:
test_loss, test_acc = shot_vit.evaluate(train_ds)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    metrics = history.history.keys()
    paired_metrics = {}

    # Group training and validation metrics
    for metric in metrics:
        if metric.startswith("val_"):
            base_metric = metric[4:]
            if base_metric in metrics:
                paired_metrics[base_metric] = (base_metric, metric)
        elif f"val_{metric}" not in metrics:
            paired_metrics[metric] = (metric, None)

    # Plot each metric with its validation counterpart
    for metric, (train_metric, val_metric) in paired_metrics.items():
        plt.figure(figsize=(8, 5))
        plt.plot(history.history[train_metric], label=f"Train {metric}", marker='o')
        if val_metric:
            plt.plot(history.history[val_metric], label=f"Validation {metric}", marker='s')
        plt.title(f'{metric.capitalize()} over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

# Call this after training
plot_training_history(history)


In [None]:
class CNNFeatureExtractor(layers.Layer):
    def __init__(self, filters=64):
        super().__init__()
        self.conv1 = layers.Conv2D(filters, kernel_size=3, strides=1, padding="same", activation="relu")
        self.conv2 = layers.Conv2D(filters * 2, kernel_size=3, strides=2, padding="same", activation="relu")
        self.conv3 = layers.Conv2D(filters * 4, kernel_size=3, strides=2, padding="same", activation="relu")
        self.flatten = layers.Flatten()

    def call(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        return self.flatten(x)

def build_cnn_vit(image_size=128, patch_size=64, embed_dim=512, num_heads=8, mlp_dim=2048, num_layers=6, num_classes=4):
    inputs = keras.Input(shape=(image_size, image_size, 3))

    # CNN Feature Extractor
    cnn_features = CNNFeatureExtractor()(inputs)
    x = layers.Dense(embed_dim, activation="relu")(cnn_features)
    x = layers.Reshape((1, embed_dim))(x)  # Create a single token for Transformer

    # Transformer Blocks
    for i in range(num_layers):
        x = transformer_block(embed_dim, num_heads, mlp_dim)(x)

    # Classification Head
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation="gelu")(x)
    x = layers.Dense(32, activation="gelu")(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return keras.Model(inputs, outputs, name="CNN-ViT")

cnn_vit = build_cnn_vit()
cnn_vit.summary()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

def visualize_attention(model, image, layer_idx=0, image_size=128):
    # Preprocess the image
    image = tf.image.resize(image, (image_size, image_size))
    image = np.expand_dims(image, axis=0)  # Add batch dimension

    # Get the transformer block
    transformer_block_layer = model.get_layer(name="TransformerBlock_1")

    # Pass the image through the model and get the attention scores
    attention_map = None
    with tf.GradientTape() as tape:
        tape.watch(image)
        logits = model(image)

        # Capture the attention scores from the transformer block
        attention_scores = transformer_block_layer.attention_scores  # Access from the transformer block
        attention_map = attention_scores[layer_idx]  # Choose the layer index for visualization

    # Select a specific attention head (e.g., first attention head)
    attention_map = attention_map[0, :, 0]  # Assuming we want to see the first token attention

    # Visualize the attention map
    plt.figure(figsize=(8, 8))
    plt.imshow(image[0])  # Original image
    plt.imshow(attention_map, alpha=0.5, cmap='jet')  # Overlay attention map with transparency
    plt.colorbar()
    plt.title(f"Attention Map for Layer {layer_idx}")
    plt.axis('off')
    plt.show()


In [None]:
# import os

# # Define dataset path
# dataset_path = "/content/dataset/data"  # Update if needed

# # Define parameters
# batch_size = 64
# img_size = (128, 128)  # Image size for ViT model
# seed = 123  # Set a seed for reproducibility

# # Load dataset properly with batching
# full_dataset = tf.keras.utils.image_dataset_from_directory(
#     dataset_path,
#     image_size=img_size,
#     batch_size=batch_size,
#     label_mode="int",
#     shuffle=True,
#     seed=seed
# )

# # Get dataset size
# dataset_size = tf.data.experimental.cardinality(full_dataset).numpy()
# train_size = int(0.7 * dataset_size)  # 70% for training
# val_size = int(0.15 * dataset_size)   # 15% for validation
# test_size = dataset_size - train_size - val_size  # Remaining 15% for testing

# # Split dataset correctly
# train_ds = full_dataset.take(train_size)  # First 70%
# remaining_ds = full_dataset.skip(train_size)  # Remaining 30%

# val_ds = remaining_ds.take(val_size)  # Next 15% for validation
# test_ds = remaining_ds.skip(val_size)  # Last 15% for testing

# # Preprocess function (Normalization)
# def preprocess(image, label):
#     image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0,1]
#     return image, label

# AUTOTUNE = tf.data.AUTOTUNE

# # Optimize datasets with caching and prefetching
# train_ds = train_ds.map(preprocess).cache().prefetch(AUTOTUNE)
# val_ds = val_ds.map(preprocess).cache().prefetch(AUTOTUNE)
# test_ds = test_ds.map(preprocess).cache().prefetch(AUTOTUNE)

In [None]:
cnn_vit.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=[
        tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
        tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="top_3_accuracy"),
    ]
)


In [None]:
# Train model
history_cnn_vit = cnn_vit.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15 # Adjust epochs based on performance
)

In [None]:
test_loss, test_acc = cnn_vit.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")S


In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    metrics = history.history.keys()
    paired_metrics = {}

    # Group training and validation metrics
    for metric in metrics:
        if metric.startswith("val_"):
            base_metric = metric[4:]
            if base_metric in metrics:
                paired_metrics[base_metric] = (base_metric, metric)
        elif f"val_{metric}" not in metrics:
            paired_metrics[metric] = (metric, None)

    # Plot each metric with its validation counterpart
    for metric, (train_metric, val_metric) in paired_metrics.items():
        plt.figure(figsize=(8, 5))
        plt.plot(history.history[train_metric], label=f"Train {metric}", marker='o')
        if val_metric:
            plt.plot(history.history[val_metric], label=f"Validation {metric}", marker='s')
        plt.title(f'{metric.capitalize()} over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel(metric.capitalize())
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

# Call this after training
plot_training_history(history)


In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
import numpy as np

# Get one image from the dataset
for image_batch, label_batch in train_ds.take(1):
    sample_image = image_batch[0:1]  # shape (1, 128, 128, 3)
    break

# Function to visualize feature maps
def plot_feature_maps(features, title, max_maps=20):
    plt.figure(figsize=(15, 5))
    for i in range(min(max_maps, features.shape[-1])):
        plt.subplot(1, max_maps, i + 1)
        plt.imshow(features[0, :, :, i], cmap='viridis')
        plt.axis('off')
    plt.suptitle(title)
    plt.show()

# Hook into each layer manually
cnn_layer = cnn_vit.layers[1]  # CNNFeatureExtractor
dense_proj = cnn_vit.layers[2]
reshape = cnn_vit.layers[3]
transformer_blocks = cnn_vit.layers[4:10]  # Assuming 6 transformer blocks
gap = cnn_vit.layers[10:]
current = sample_image

# Step 1: CNN Layers
print("Input to CNN:", current.shape)
cnn_model = Model(cnn_vit.input, cnn_layer.output)
cnn_out = cnn_model.predict(sample_image)
print("Output from CNN (flattened):", cnn_out.shape)

# Visualize intermediate CNN features (hook into each conv)
conv1_out = cnn_layer.conv1(sample_image)
plot_feature_maps(conv1_out, "Conv1 Feature Maps")

conv2_out = cnn_layer.conv2(conv1_out)
plot_feature_maps(conv2_out, "Conv2 Feature Maps")

conv3_out = cnn_layer.conv3(conv2_out)
plot_feature_maps(conv3_out, "Conv3 Feature Maps")

# Step 2: Dense projection
x = dense_proj(cnn_out)
print("After Dense projection:", x.shape)

# Step 3: Reshape
x = reshape(x)
print("After Reshape to token:", x.shape)

# Step 4: Transformer blocks
for i, block in enumerate(transformer_blocks):
    x = block(x)
    print(f"After Transformer Block {i+1}:", x.shape)

# Step 5: Classification head
for i, layer in enumerate(gap):
    x = layer(x)
    print(f"After {layer.name}:", x.shape)


In [None]:
import matplotlib.pyplot as plt

def plot_all_metrics(history):
    """
    Plots training and validation metrics from model.fit history.
    Supports loss, accuracy, precision, recall, mse, mae, etc.
    """
    history_dict = history.history
    metrics = list(history_dict.keys())

    # Group training and validation metrics
    metric_pairs = {}
    for metric in metrics:
        print(metric)
        if metric.startswith('val_'):
            base = metric[4:]
            if base in metrics:
                metric_pairs[base] = (base, metric)
        elif f"val_{metric}" not in metrics:
            metric_pairs[metric] = (metric, None)

    # Plot each metric and its validation counterpart
    for metric, (train_metric, val_metric) in metric_pairs.items():
        plt.figure(figsize=(8, 5))
        plt.plot(history_dict[train_metric], label=f'Train {metric}', marker='o')
        if val_metric:
            plt.plot(history_dict[val_metric], label=f'Val {metric}', marker='s')
        plt.title(f'{metric.upper()} over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel(metric.upper())
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()
plot_all_metrics(history)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

def visualize_predictions(model, dataset, class_names, num_images=10):
    # Get a batch of images and labels from the dataset
    all_images = []
    all_labels = []

    for images, labels in dataset.unbatch().take(1000):  # Limit so we don't exhaust resources
        all_images.append(images.numpy())
        all_labels.append(labels.numpy())

    # Randomly select num_images indices
    indices = random.sample(range(len(all_images)), num_images)

    plt.figure(figsize=(15, 8))
    for i, idx in enumerate(indices):
        img = all_images[idx]
        label = all_labels[idx]

        # Model expects batch input
        pred_logits = model.predict(np.expand_dims(img, axis=0), verbose=0)
        pred_class = np.argmax(pred_logits, axis=-1)[0]

        plt.subplot(2, 5, i + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"True: {class_names[label]}\nPred: {class_names[pred_class]}")

    plt.tight_layout()
    plt.show()
visualize_predictions(cnn_vit, train_ds, class_names)

