# Guided Slot Attention for Unsupervised Video Object Segmentation.ipynb

## Step 1: Create the folder for training

In [None]:
import os
#  Define paths for original and target directories
dataset_dir = "/content/dataset"  # Base directory for your dataset

# Define source paths
duts_source = "/content/DUTS"  # Replace with actual path to your DUTS dataset
davis_source = "/content/DAVIS"  # Replace with actual path to your DAVIS dataset

# Define target paths
duts_train_rgb = os.path.join(dataset_dir, "DUTS_train", "RGB")
duts_train_gt = os.path.join(dataset_dir, "DUTS_train", "GT")
davis_train_rgb = os.path.join(dataset_dir, "DAVIS_train", "RGB")
davis_train_gt = os.path.join(dataset_dir, "DAVIS_train", "GT")
davis_train_flow = os.path.join(dataset_dir, "DAVIS_train", "FLOW")
davis_test_rgb = os.path.join(dataset_dir, "DAVIS_test")
davis_test_gt = os.path.join(dataset_dir, "DAVIS_test")
davis_test_flow = os.path.join(dataset_dir, "DAVIS_test")

# Create the required directory structure
os.makedirs(duts_train_rgb, exist_ok=True)
os.makedirs(duts_train_gt, exist_ok=True)
os.makedirs(davis_train_rgb, exist_ok=True)
os.makedirs(davis_train_gt, exist_ok=True)
os.makedirs(davis_train_flow, exist_ok=True)


## Step 2: Download davis-2017 train set
### Ref: https://davischallenge.org/davis2016/code.html

In [None]:
%cd /content
!git clone https://github.com/davisvideochallenge/davis-2017.git
%cd davis-2017/data
!sh get_davis.sh

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: DAVIS/JPEGImages/480p/libby/00003.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00004.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00005.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00006.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00007.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00008.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00009.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00010.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00011.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00012.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00013.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00014.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00015.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00016.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00017.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00018.jpg  
  inflating: DAVIS/JPEGImages/480p/libby/00019.jpg  
  inflating: DAVIS/JPEGImages/480p

In [None]:
%cd /content/davis/data
!sh get_davis.sh
!sh get_davis_results.sh

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: DAVIS/Results/Segmentations/480p/fst/elephant/00014.png  
   creating: DAVIS/Results/Segmentations/480p/fst/rhino/
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00044.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00034.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00082.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00073.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00031.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00069.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00012.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00061.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00000.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00054.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/00067.png  
  inflating: DAVIS/Results/Segmentations/480p/fst/rhino/000

In [None]:
import os
import shutil

# Define the source directory for DAVIS 480p images and annotations
davis_source = "/content/davis-2017/data/DAVIS"
davis_train_rgb = "/content/dataset/DAVIS_train/RGB"  # Target directory for RGB images
davis_train_gt = "/content/dataset/DAVIS_train/GT"    # Target directory for ground truth (GT) annotations

# Create target directories if they don't already exist
os.makedirs(davis_train_rgb, exist_ok=True)
os.makedirs(davis_train_gt, exist_ok=True)

# Copy DAVIS 480p RGB images
for seq in os.listdir(os.path.join(davis_source, "JPEGImages/480p")):
    seq_rgb_dir = os.path.join(davis_train_rgb, seq)
    os.makedirs(seq_rgb_dir, exist_ok=True)

    # Copy each image in the sequence
    for file in os.listdir(os.path.join(davis_source, "JPEGImages/480p", seq)):
        shutil.copy(os.path.join(davis_source, "JPEGImages/480p", seq, file), seq_rgb_dir)

# Copy DAVIS 480p GT annotations
for seq in os.listdir(os.path.join(davis_source, "Annotations/480p")):
    seq_gt_dir = os.path.join(davis_train_gt, seq)
    os.makedirs(seq_gt_dir, exist_ok=True)

    # Copy each annotation in the sequence
    for file in os.listdir(os.path.join(davis_source, "Annotations/480p", seq)):
        shutil.copy(os.path.join(davis_source, "Annotations/480p", seq, file), seq_gt_dir)

print("Train DAVIS 480p images and annotations copied successfully!")


Train DAVIS 480p images and annotations copied successfully!


In [None]:
# import cv2
# import os
# import numpy as np

# # Define paths
# davis_rgb_path = "/content/dataset/DAVIS_train/RGB"  # Replace with the path to DAVIS RGB images
# flow_output_path = "/content/dataset/DAVIS_train/FLOW"
# os.makedirs(flow_output_path, exist_ok=True)

# # Process each sequence (e.g., bear, blackswans) in DAVIS
# for sequence in os.listdir(davis_rgb_path):
#     seq_path = os.path.join(davis_rgb_path, sequence)
#     flow_seq_path = os.path.join(flow_output_path, sequence)
#     os.makedirs(flow_seq_path, exist_ok=True)

#     frames = sorted([f for f in os.listdir(seq_path) if f.endswith('.jpg')])

#     # Initialize variables to store the previous frame
#     prev_frame = None

#     # Loop over each frame in the sequence
#     for i in range(1, len(frames)):
#         # Read current and previous frames
#         frame1 = cv2.imread(os.path.join(seq_path, frames[i - 1]))
#         frame2 = cv2.imread(os.path.join(seq_path, frames[i]))

#         # Convert frames to grayscale as required for optical flow calculation
#         prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
#         curr_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

#         # Calculate optical flow between the frames using Farneback method
#         flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None,
#                                             pyr_scale=0.5, levels=3, winsize=15,
#                                             iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

#         # Save flow visualization
#         hsv = cv2.cvtColor(frame1, cv2.COLOR_BGR2HSV)
#         mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
#         hsv[..., 0] = ang * 180 / np.pi / 2
#         hsv[..., 1] = 255
#         hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
#         rgb_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

#         flow_filename = os.path.join(flow_seq_path, f"{frames[i - 1].split('.')[0]}_flow.jpg")
#         cv2.imwrite(flow_filename, rgb_flow)

# print("Flow train data generated and saved.")


## Step 3: Download the davis-2017 testset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -r "/content/drive/MyDrive/DAVIS_dataset" /content

In [None]:
import zipfile

# Define the path to the zip file in Google Drive
zip_path = "/content/drive/MyDrive/DAVIS_dataset/DAVIS-2017-test-dev-480p.zip"

# Define the destination folder for the extracted contents
extract_to = "/content/DAVIS_dataset"

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Extraction DAVIS-2017-test completed.")

Extraction DAVIS-2017-test completed.


In [None]:
import os
import shutil

# Define source and target paths
davis_source = "/content/DAVIS_dataset/DAVIS"
davis_test = "/content/dataset/DAVIS_test"

# Ensure the target directory exists
os.makedirs(davis_test, exist_ok=True)

# Copy DAVIS test dataset (organized by sequences like "blackswan", "bmx-trees", etc.)
for seq in os.listdir(os.path.join(davis_source, "JPEGImages/480p")):
    # Define subdirectories for each sequence
    seq_dir = os.path.join(davis_test, seq)
    seq_rgb_dir = os.path.join(seq_dir, "RGB")
    seq_gt_dir = os.path.join(seq_dir, "GT")
    seq_flow_dir = os.path.join(seq_dir, "FLOW")

    # Create directories for each sequence
    os.makedirs(seq_rgb_dir, exist_ok=True)
    os.makedirs(seq_gt_dir, exist_ok=True)
    os.makedirs(seq_flow_dir, exist_ok=True)

    # Copy RGB images
    for file in os.listdir(os.path.join(davis_source, "JPEGImages/480p", seq)):
        src_file = os.path.join(davis_source, "JPEGImages/480p", seq, file)
        dst_file = os.path.join(seq_rgb_dir, file)
        if os.path.isfile(src_file):  # Ensure it's a file
            shutil.copy(src_file, dst_file)

    # Copy GT annotations
    for file in os.listdir(os.path.join(davis_source, "Annotations/480p", seq)):
        src_file = os.path.join(davis_source, "Annotations/480p", seq, file)
        dst_file = os.path.join(seq_gt_dir, file)
        if os.path.isfile(src_file):  # Ensure it's a file
            shutil.copy(src_file, dst_file)

    # Copy FLOW data if it exists (optional)
    if os.path.exists(os.path.join(davis_source, "Flow", seq)):
        for file in os.listdir(os.path.join(davis_source, "Flow", seq)):
            src_file = os.path.join(davis_source, "Flow", seq, file)
            dst_file = os.path.join(seq_flow_dir, file)
            if os.path.isfile(src_file):  # Ensure it's a file
                shutil.copy(src_file, dst_file)

print("DAVIS test dataset copied successfully.")


DAVIS test dataset copied successfully.


## Step 4: Download the DUST dataset

In [None]:
%cd /content

import os, requests
# URLs for the dataset
urls = {
    'DUTS-TR': 'http://saliencydetection.net/duts/download/DUTS-TR.zip',
}

# Directory to save the datasets
save_dir = './DUTS_dataset'
os.makedirs(save_dir, exist_ok=True)

for name, url in urls.items():
    print(f'Downloading {name}...')
    response = requests.get(url, stream=True)
    file_path = os.path.join(save_dir, f'{name}.zip')
    with open(file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
    print(f'{name} downloaded and saved to {file_path}')

/content
Downloading DUTS-TR...
DUTS-TR downloaded and saved to ./DUTS_dataset/DUTS-TR.zip


In [None]:
import zipfile

for name in urls.keys():
    file_path = os.path.join(save_dir, f'{name}.zip')
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(os.path.join(save_dir, name))
    print(f'{name} extracted.')

DUTS-TR extracted.


In [None]:
import os
import shutil

# Define the source and target directories
duts_source = "/content/DUTS_dataset/DUTS-TR/DUTS-TR"
duts_train_rgb = "/content/dataset/DUTS_train/RGB"
duts_train_gt = "/content/dataset/DUTS_train/GT"

# Ensure target directories exist
os.makedirs(duts_train_rgb, exist_ok=True)
os.makedirs(duts_train_gt, exist_ok=True)

# Copy RGB images
for file in os.listdir(os.path.join(duts_source, "DUTS-TR-Image")):
    src_file = os.path.join(duts_source, "DUTS-TR-Image", file)
    dst_file = os.path.join(duts_train_rgb, file)
    if os.path.isfile(src_file):  # Ensure it's a file
        shutil.copy(src_file, dst_file)

# Copy GT masks
for file in os.listdir(os.path.join(duts_source, "DUTS-TR-Mask")):
    src_file = os.path.join(duts_source, "DUTS-TR-Mask", file)
    dst_file = os.path.join(duts_train_gt, file)
    if os.path.isfile(src_file):  # Ensure it's a file
        shutil.copy(src_file, dst_file)

print("DUTS dataset copied successfully.")


DUTS dataset copied successfully.


In [None]:
# import shutil

# # Source folder to upload
# source_folder = "/content/dataset"  # Change to your folder path
# # Destination folder in Google Drive
# destination_folder = "/content/drive/MyDrive/GSANet_dataset"  # Change to your Google Drive path

# shutil.copytree(source_folder, destination_folder)

FileExistsError: [Errno 17] File exists: '/content/drive/MyDrive/GSANet_dataset'

## Run the train

In [None]:
# import shutil

# # Specify the path to the folder you want to delete
# folder_path = "/content/slot-attention-video-segmenter-app"

# # Delete the folder and all its contents
# shutil.rmtree(folder_path)

# print("Folder deleted successfully.")

Folder deleted successfully.


In [None]:
%cd /content
!git clone https://github.com/tan-nt/slot-attention-video-segmenter-app
!pip install fast-pytorch-kmeans

/content
Cloning into 'slot-attention-video-segmenter-app'...
remote: Enumerating objects: 53, done.[K
remote: Counting objects: 100% (53/53), done.[K
remote: Compressing objects: 100% (45/45), done.[K
remote: Total 53 (delta 15), reused 34 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (53/53), 17.84 MiB | 20.28 MiB/s, done.
Resolving deltas: 100% (15/15), done.


In [None]:
%cd /content/slot-attention-video-segmenter-app
!python pretrain.py

/content/slot-attention-video-segmenter-app
2024-11-14 03:52:10.585606: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-14 03:52:10.605402: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-14 03:52:10.611356: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-14 03:52:10.625725: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Load data

In [None]:
import shutil

# Replace 'your_folder' with the path to the folder you want to download
shutil.make_archive('/content/slot-attention-video-segmenter-app', 'zip', '/content/slot-attention-video-segmenter-app')


'/content/slot-attention-video-segmenter-app.zip'

In [None]:
from google.colab import files

# Download the zip file
files.download('/content/slot-attention-video-segmenter-app.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>