### 0. Mounting the GoogleDrive

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### 1. Installing Open Grounding Dino

In [None]:
!unzip /content/drive/MyDrive/Open-GroundingDino.zip -d /content

In [None]:
!git clone "https://github.com/longzw1997/Open-GroundingDino"

Cloning into 'Open-GroundingDino'...
remote: Enumerating objects: 181, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 181 (delta 41), reused 28 (delta 28), pack-reused 101[K
Receiving objects: 100% (181/181), 8.72 MiB | 12.07 MiB/s, done.
Resolving deltas: 100% (66/66), done.


In [3]:
%cd Open-GroundingDino

/content/Open-GroundingDino


In [4]:
#install requirements.txt
!pip install -r requirements.txt

Successfully installed addict-2.4.0 colorlog-6.8.2 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 submitit-1.5.1 supervision-0.6.0 timm-1.0.3 yapf-0.40.1


In [5]:
%cd models/GroundingDINO/ops

/content/Open-GroundingDino/models/GroundingDINO/ops


In [None]:
!python setup.py install

In [7]:
!python test.py

* True check_forward_equal_with_pytorch_double: max_abs_err 8.67e-19 max_rel_err 2.35e-16
* True check_forward_equal_with_pytorch_float: max_abs_err 4.66e-10 max_rel_err 1.13e-07
* True check_gradient_numerical(D=30)
* True check_gradient_numerical(D=32)
* True check_gradient_numerical(D=64)
* True check_gradient_numerical(D=71)


In [8]:
%cd /content

/content


### 2. Converting training data from  COCO to odvg, Val data remains in COCO format only

- modifying `line 11` and `line 16` of `Open-GroundingDino/tools/coco2odvg.py` according to the dataset

In [None]:
#chenge path of input file to your input Coco json file
!python /content/Open-GroundingDino/tools/coco2odvg.py --input "/content/Yolo-to-COCO-format-converter/output/jun13_200ing.json"  --output "output.jsonl"

#### Make a folder with contents
- Folder name `anno` with 'label.json' and `output.jsonl`
- Folder name `train` with all train images
- Folder name `val` with all val images
- file name `val.json` with annotations in coco format
- zip the folder and upload it to drive

In [None]:
#unzip the uploaded folder in colab
!unzip /content/drive/MyDrive/open_img.zip -d /content

In [10]:
#change the paths according to unziped folder
import json

# Define the data
data = {
    "train": [
        {
            "root": "/content/open_img/train/",
            "anno": "/content/open_img/anno/out.jsonl",
            "label_map": "/content/open_img/anno/label.json",
            "dataset_mode": "odvg"
        }
    ],
    "val": [
        {
            "root": "/content/open_img/val",
            "anno": "/content/open_img/val.json",
            "label_map": None,
            "dataset_mode": "coco"
        }
    ]
}

file_path = '/content/Open-GroundingDino/config/datasets_mixed_odvg.json'

with open(file_path, 'w') as file:
    json.dump(data, file, indent=2)

print(f"Data has been written to {file_path}")


Data has been written to /content/Open-GroundingDino/config/datasets_mixed_odvg.json


### 3. Modify some file according you dataset
-  modify `Open-GroundingDino/config/cfg_coco.py` and `Open-GroundingDino/config/cfg_odvg.py` with your own label list

```python
use_coco_eval = False
label_list=["bolt","wrong direction","1","2","3","4","5"] #use your own labels
```

In [None]:
#make a output directory to store the checkpoints of trained model
import os
os.makedirs("/content/output", exist_ok=True)

#### 4. Download `groundingdino_swint_ogc.pth` and `bert`

In [None]:
!wget https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth&ved=2ahUKEwjLqsnAstiGAxVPbGwGHbcOADAQFnoECBYQAQ&usg=AOvVaw1HJopClO4_-MXLi9Ae6-le

In [None]:
import os
os.makedirs("/content/bert", exist_ok=True)

In [None]:
from transformers import AutoTokenizer, AutoModel

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

In [None]:
%cd /content/bert

In [None]:
print(tokenizer.save_pretrained("."))
print(model.save_pretrained("."))

In [None]:
%cd /content

### 5. Replace whole code of `train_dist.sh` with code below to run on single gpu
- Add path of groundingdino_swint_ogc.pth and bert folder
```python
CFG=$1
DATASETS=$2
OUTPUT_DIR=$3

# Set the environment variable for CUDA
export CUDA_VISIBLE_DEVICES=0

python main.py \
    --config_file ${CFG} \
    --datasets ${DATASETS} \
    --output_dir ${OUTPUT_DIR} \
    --pretrain_model_path /path/to/groundingdino_swint_ogc.pth \ 
    --options text_encoder_type=/path/to/bert-base-uncased
```

In [None]:
%cd /content

### 6. Training the model

In [19]:
GPU_NUM=1
CGF="/content/Open-GroundingDino/config/cfg_odvg.py"
DATASETS="/content/Open-GroundingDino/config/datasets_mixed_odvg.json"
OUTPUT_DIR="/content/output"
!chmod +x train_dist.sh
!bash train_dist.sh {CGF} {DATASETS} {OUTPUT_DIR}

Not using distributed mode
Loading config file from /content/Open-GroundingDino/config/cfg_odvg.py
[32mINFO    [0m [32m2024-06-12 05:47:40,352 | [34mgit:
  sha: a1f9128db6f6fee00c0552aab0a1d381d834dbe3, status: has uncommited changes, branch: main
[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,352 | [34mCommand: main.py --config_file /content/Open-GroundingDino/config/cfg_odvg.py --datasets /content/Open-GroundingDino/config/datasets_mixed_odvg.json --output_dir /content/output --pretrain_model_path /content/drive/MyDrive/groundingdino_swint_ogc(1).pth --options text_encoder_type=/content/bert[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,353 | [34mFull config saved to /content/output/config_args_all.json[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,353 | [34mworld size: 1[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,353 | [34mrank: 0[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,353 | [34mlocal_rank: 0[0m
[32mINFO    [0m [32m2024-06-12 05:47:40,353 | [34margs: Nam

### 7. Inference on the Data

In [23]:
!git clone https://github.com/IDEA-Research/GroundingDINO.git

Cloning into 'GroundingDINO'...
remote: Enumerating objects: 443, done.[K
remote: Counting objects: 100% (211/211), done.[K
remote: Compressing objects: 100% (83/83), done.[K
remote: Total 443 (delta 156), reused 137 (delta 128), pack-reused 232[K
Receiving objects: 100% (443/443), 12.86 MiB | 19.04 MiB/s, done.
Resolving deltas: 100% (228/228), done.


In [24]:
%cd GroundingDINO/

/content/Open-GroundingDino/GroundingDINO


In [25]:
!pip install -e .

Obtaining file:///content/Open-GroundingDino/GroundingDINO
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: groundingdino
  Running setup.py develop for groundingdino
Successfully installed groundingdino-0.1.0


In [26]:
%cd /content

/content


### 8. Creating a folder for Saving the images

In [36]:
import os
os.makedirs("/content/allval_images_in_folder", exist_ok=True)


/content/allval_images_in_folder


In [40]:
%cd /content/allval_images_in_folder

/content/allval_images_in_folder


In [41]:
pwd

'/content/allval_images_in_folder'

##### Inference on a single Image

In [42]:
!python "/content/Open-GroundingDino/tools/inference_on_a_image.py" \
  -c "/content/Open-GroundingDino/tools/GroundingDINO_SwinT_OGC.py" \
  -p "/content/output/checkpoint_best_regular.pth" \
  -i "/content/open_img/val/Image__2024-03-13__13-12-44 - Copy.png" \
  -t "bolt . wrong direction . 1 . 2 . 3 . 4 . 5 " \
  -o pred_images

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Traceback (most recent call last):
  File "/content/Open-GroundingDino/tools/inference_on_a_image.py", line 188, in <module>
    model = load_model(config_file, checkpoint_path, cpu_only=args.cpu_only)
  File "/content/Open-GroundingDino/tools/inference_on_a_image.py", line 75, in load_model
    model = build_model(args)
  File "/content/Open-GroundingDino/GroundingDINO/groundingdino/models/__init__.py", line 17, in build_model
    model = build_func(args)
  File "/content/Open-GroundingDino/GroundingDINO/groundingdino/models/GroundingDINO/groundingdino.py", line 381, in build_groundingdino
    backbone = build_backbone(args)
  File "/content/Open-GroundingDino/GroundingDINO/groundingdino/models/GroundingDINO/backbone/backbone.py", line 199, in build_backbone
    backbone = build_swin_transformer(
  File "/content/Open-GroundingDino/GroundingDINO/groundingdino/models/GroundingDINO/backbone/swin_transformer.py", line

##### Inferencing on Val images folder 

In [45]:
import os
import subprocess

# Directory containing the images
image_dir = "/content/open_img/val"
# Get a list of all image files in the directory
image_files = [f for f in os.listdir(image_dir) if f.endswith('.png') or f.endswith('.jpg')]

# Define the other arguments for the inference script
config_path = "/content/Open-GroundingDino/tools/GroundingDINO_SwinT_OGC.py"
checkpoint_path = "/content/output/checkpoint_best_regular.pth"
text_prompts = "bolt . wrong direction . 1 . 2 . 3 . 4 . 5 "
output_dir = "pred_images"

# Loop over all image files and run the inference script on each one
for image_file in image_files:
    image_path = os.path.join(image_dir, image_file)
    command = [
        "python", "/content/Open-GroundingDino/tools/inference_on_a_image.py",
        "-c", config_path,
        "-p", checkpoint_path,
        "-i", image_path,
        "-t", text_prompts,
        "-o", output_dir+image_file
    ]
    subprocess.run(command)

##### Organizing Images in a single folder and copying folder to Drive

In [None]:
import os
import shutil

def is_image(file):
    image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff']
    return any(file.lower().endswith(ext) for ext in image_extensions)

def copy_images(src_dir, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    for root, _, files in os.walk(src_dir):
        for file in files:
            if is_image(file):
                src_file_path = os.path.join(root, file)
                dest_file_path = os.path.join(dest_dir, file)

                # To handle duplicate filenames
                if os.path.exists(dest_file_path):
                    base, ext = os.path.splitext(file)
                    count = 1
                    while os.path.exists(dest_file_path):
                        dest_file_path = os.path.join(dest_dir, f"{base}_{count}{ext}")
                        count += 1

                shutil.copy2(src_file_path, dest_file_path)
                print(f"Copied {src_file_path} to {dest_file_path}")

# Example usage
src_directory = '/content/allval_images_in_folder'
dest_directory = '/content/final_val_images'
copy_images(src_directory, dest_directory)


In [None]:
!zip -r /content/final_val_images.zip /content/final_val_images

In [53]:
!cp "/content/final_val_images.zip" "/content/drive/MyDrive"