In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Importing required libraries.
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from keras.layers import Input, Conv2D, MaxPooling2D, Dropout, concatenate, Conv2DTranspose
from keras.models import Model
from keras.optimizers import Adam
import random

# ---- Data Paths ----
image_list = os.listdir('PageSegData/PageImg')
image_list = [filename.split(".")[0] for filename in image_list]

def visualize(img, seg_img):
    """Visualizes image and segmentation mask."""
    plt.figure(figsize=(20, 20))
    plt.subplot(1, 2, 1)
    plt.imshow(img, cmap='gray')
    plt.title('Image')
    plt.subplot(1, 2, 2)
    plt.imshow(seg_img, cmap='gray')
    plt.title('Segmented Image')
    plt.show()

def get_segmented_img(img, n_classes):
    """Creates segmentation mask for the input image."""
    seg_labels = np.zeros((512, 512, 1))
    img = cv2.resize(img, (512, 512))
    img = img[:, :, 0]
    seg_labels[:, :, 0] = (img != 0).astype(int)
    return seg_labels

def preprocess_img(img):
    img = cv2.resize(img, (512, 512))
    return img

def batch_generator(filelist, n_classes, batch_size):
    while True:
        X = []
        Y = []
        for i in range(batch_size):
            fn = random.choice(filelist)
            img = cv2.imread(f'PageSegData/PageImg/{fn}.JPG', 0)
            ret, img = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY_INV)
            img = cv2.resize(img, (512, 512))
            img = np.expand_dims(img, axis=-1)
            img = img / 255.0

            seg = cv2.imread(f'PageSegData/PageSeg/{fn}_mask.png', 1)
            seg = get_segmented_img(seg, n_classes)

            X.append(img)
            Y.append(seg)
        yield np.array(X), np.array(Y)

# ---- Modified Lightweight Deep U-Net Model ----
def unet_lightweight(input_size=(512, 512, 1), pretrained_weights=None):
    # Encoder
    inputs = Input(input_size)
    conv1 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
    conv1 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
    conv2 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
    conv3 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
    conv4 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
    conv5 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
    pool5 = MaxPooling2D(pool_size=(2, 2))(conv5)

    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool5)
    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
    pool6 = MaxPooling2D(pool_size=(2, 2))(conv6)

    # Bottleneck (spatial size: 8x8)
    conv7 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool6)
    conv7 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
    drop7 = Dropout(0.5)(conv7)

    # Decoder
    up8 = Conv2DTranspose(512, 2, strides=(2, 2), padding='same')(drop7)
    merge8 = concatenate([conv6, up8], axis=3)
    conv8 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
    conv8 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)

    up9 = Conv2DTranspose(256, 2, strides=(2, 2), padding='same')(conv8)
    merge9 = concatenate([conv5, up9], axis=3)
    conv9 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
    conv9 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)

    up10 = Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(conv9)
    merge10 = concatenate([conv4, up10], axis=3)
    conv10 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge10)
    conv10 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv10)

    up11 = Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(conv10)
    merge11 = concatenate([conv3, up11], axis=3)
    conv11 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge11)
    conv11 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv11)

    up12 = Conv2DTranspose(32, 2, strides=(2, 2), padding='same')(conv11)
    merge12 = concatenate([conv2, up12], axis=3)
    conv12 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge12)
    conv12 = Conv2D(32, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv12)

    up13 = Conv2DTranspose(16, 2, strides=(2, 2), padding='same')(conv12)
    merge13 = concatenate([conv1, up13], axis=3)
    conv13 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge13)
    conv13 = Conv2D(16, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv13)

    # Output
    output = Conv2D(1, 1, activation='sigmoid')(conv13)

    model = Model(inputs, output)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    if pretrained_weights:
        model.load_weights(pretrained_weights)

    return model

# ---- Data Split ----
random.shuffle(image_list)
file_train = image_list[0:int(0.75 * len(image_list))]
file_test = image_list[int(0.75 * len(image_list)):]

# ---- Model Training ----
model = unet_lightweight()
model.summary()

from keras.callbacks import ModelCheckpoint

# (Adjust batch size/epochs/steps as needed)
model.fit(
    batch_generator(file_train, 2, 2),
    epochs=5,
    steps_per_epoch=1000,
    validation_data=batch_generator(file_test, 2, 2),
    validation_steps=400
)

# ---- Save Final Model ----
model.save('u-net-lightweight-final-model.h5')

# ---- Sample Prediction ----
img = cv2.imread('S5_254_13_1.jpg', 0)
ret, img = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY_INV)
img = cv2.resize(img, (512, 512))
img = np.expand_dims(img, axis=-1)
img = np.expand_dims(img, axis=0) / 255.0
pred = model.predict(img)
pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)
plt.imshow(pred, cmap='gray')
plt.imsave('OUTPUT_S5_254_13_1.JPG', pred)



In [None]:
# --- 1. Install Detectron2 in Colab ---
import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# --- 2. Import Libraries ---
import torch, detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import numpy as np
import os, json, random
from sklearn.model_selection import KFold
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# --- 3. Set Paths ---
FULL_ANN_JSON = "/content/drive/MyDrive/Moumita_NLP/word_seg_from_lines/images/via_project_3Sep2024_15h16m_coco794lines.json"
IMG_DIR = "/content/drive/MyDrive/Moumita_NLP/word_seg_from_lines/images"

# --- 4. Load Annotations ---
with open(FULL_ANN_JSON, 'r') as f:
    coco_all = json.load(f)
all_img_ids = [img['id'] for img in coco_all['images']]

# --- 5. Cross Validation ---
kf = KFold(n_splits=10, shuffle=True, random_state=42)
ap50_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(all_img_ids)):
    print(f"\n====== Fold {fold+1}/10 ======")
    # Split images for this fold
    train_ids = set([all_img_ids[i] for i in train_idx])
    val_ids   = set([all_img_ids[i] for i in val_idx])

    # Build new train/val JSONs for this fold
    def filter_coco(img_ids):
        imgs = [img for img in coco_all['images'] if img['id'] in img_ids]
        anns = [ann for ann in coco_all['annotations'] if ann['image_id'] in img_ids]
        return {
            "images": imgs,
            "annotations": anns,
            "categories": coco_all["categories"]
        }
    train_json = f"train_fold{fold+1}.json"
    val_json   = f"val_fold{fold+1}.json"
    with open(train_json, "w") as f: json.dump(filter_coco(train_ids), f)
    with open(val_json,   "w") as f: json.dump(filter_coco(val_ids), f)

    # Register these folds
    train_name = f"word_train_fold{fold+1}"
    val_name   = f"word_val_fold{fold+1}"
    register_coco_instances(train_name, {}, train_json, IMG_DIR)
    register_coco_instances(val_name,   {}, val_json, IMG_DIR)

    # Config for this fold
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = (train_name,)
    cfg.DATASETS.TEST = (val_name,)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.001
    cfg.SOLVER.MAX_ITER = 1500   # Reduce for Colab if needed
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
    cfg.OUTPUT_DIR = f"./output_fold{fold+1}"
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

    # Evaluate on validation fold
    evaluator = COCOEvaluator(val_name, cfg, False, output_dir=cfg.OUTPUT_DIR)
    val_loader = build_detection_test_loader(cfg, val_name)
    eval_results = inference_on_dataset(trainer.model, val_loader, evaluator)
    ap50 = eval_results["segm"]["AP50"]
    ap50_scores.append(ap50)
    print(f"Fold {fold+1} AP50: {ap50:.2f}")

# Print summary
print("\n=== 10-Fold Cross Validation Results ===")
print("AP50 per fold:", [f"{score:.2f}" for score in ap50_scores])
print(f"Mean AP50: {np.mean(ap50_scores):.2f}  |  Std: {np.std(ap50_scores):.2f}")
