<a href="https://colab.research.google.com/github/kanaad-lims/AIRL-Internship-Questions/blob/main/q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Install required packages in Colab-friendly way
!pip install -q --upgrade pip setuptools wheel
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q segment-anything timm matplotlib opencv-python pillow requests einops shapely yacs ftfy prettytable


In [14]:
# Install Environments. This will take a few minutes. Please be patient ;)
! nvidia-smi
! git clone https://github.com/microsoft/GLIP.git
%cd GLIP
! git checkout c663d9db8a503e04c6b76cd2e14152bab775d28a
! pip install torch torchvision torchaudio
! pip install einops shapely timm yacs tensorboardX ftfy prettytable pymongo
! pip install transformers
! python setup.py build develop --user
! mkdir MODEL

In [None]:
import torch
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np
import sys
import os

# Add GLIP repo to Python path
sys.path.append("/content/GLIP")

# SAM imports
from segment_anything import sam_model_registry, SamPredictor

# GLIP imports
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.engine.predictor_glip import GLIPDemo

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")


In [None]:
import requests
from io import BytesIO

def load_image(url):
    """
    Given an URL of an image, download it and return a numpy array in BGR format
    """
    response = requests.get(url)
    pil_image = Image.open(BytesIO(response.content)).convert("RGB")
    image = np.array(pil_image)[:, :, [2,1,0]]  # RGB -> BGR
    return image

def imshow(img, caption=""):
    plt.figure(figsize=(12,8))
    plt.imshow(img[:, :, [2,1,0]])  # BGR -> RGB for display
    plt.axis("off")
    plt.title(caption)
    plt.show()


In [None]:
# GLIP config
config_file = "GLIP/configs/pretrain/glip_Swin_T_O365_GoldG.yaml"
weight_file = "GLIP/MODEL/glip_tiny_model_o365_goldg_cc_sbu.pth"

cfg.local_rank = 0
cfg.num_gpus = 1
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", device])

glip_demo = GLIPDemo(cfg, min_image_size=800, confidence_threshold=0.7, show_mask_heatmaps=False)


In [None]:
# Download SAM checkpoint if not exists
!mkdir -p /content/checkpoints
sam_checkpoint = "/content/checkpoints/sam_vit_h_4b8939.pth"

if not os.path.exists(sam_checkpoint):
    !wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth -O {sam_checkpoint}

# Load SAM model
sam_model = sam_model_registry["vit_h"](checkpoint=sam_checkpoint)
sam_model.to(device=device)
sam_predictor = SamPredictor(sam_model)


In [None]:
# Load an image
image_url = "http://farm4.staticflickr.com/3693/9472793441_b7822c00de_z.jpg"
image = load_image(image_url)
text_prompt = "bobble heads on top of the shelf"

# GLIP inference
result_image, predictions = glip_demo.run_on_web_image(image, text_prompt, 0.5)
imshow(result_image, caption=text_prompt)

# Extract bounding boxes from GLIP predictions for SAM seeds
boxes = predictions.bbox.cpu().numpy()  # shape: [N,4]
