<a href="https://colab.research.google.com/github/praveenpankaj/spatio-temporal-segmentation/blob/main/PASTIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <a href="https://colab.research.google.com/github/praveenpankaj/spatio-temporal-segmentation/blob/main/PASTIS.ipynb" target="_parent">
# <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
# </a>

## ⚙️ Installing Required Dependencies

In [None]:
# Move this to requirements.txt
print("⚙️ Installing dependencies...")
!pip install torchnet
!pip install torch_scatter

## 📥 Downloading PASTIS Data

In [None]:
# Uncomment the following line to download the dataset if it's not already present
!wget https://zenodo.org/records/5012942/files/PASTIS.zip


--2025-02-23 15:31:28--  https://zenodo.org/records/5012942/files/PASTIS.zip
Resolving zenodo.org (zenodo.org)... 188.185.45.92, 188.185.48.194, 188.185.43.25, ...
Connecting to zenodo.org (zenodo.org)|188.185.45.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28760245504 (27G) [application/octet-stream]
Saving to: ‘PASTIS.zip’


In [None]:
# Unzipping the dataset
print("📂 Extracting PASTIS dataset...")
!unzip -q PASTIS.zip


In [None]:
# uncomment this line to remove the downloaded archive file
# rm -r '/content/PASTIS.zip'

## 🔗 Mounting Google Drive in Colab

In [None]:
import os
import json
import numpy as np
import shutil
from google.colab import drive


if not os.path.exists('outpath'):
  os.mkdir('outpath')

In [None]:
# Mount Google Drive to access and store data
print("🔗 Mounting Google Drive...")
drive.mount('/content/drive')

Mounted at /content/drive


## 🚚 Transferring PASTIS data from Google Colab to Google Drive

In [None]:
# Define source and destination paths
src_path = '/content/outpath'
dst_path = '/content/drive/MyDrive/PASTIS_Data/'

# Move dataset from Colab storage to Google Drive
print(f"🚛 Moving dataset to Google Drive: {dst_path}")
shutil.move(src_path, dst_path)
print("✅ File moved successfully.")

File moved to: /content/drive/MyDrive/PASTIS_Data/


## 📊 Sampling 100 Data Points from Each Region

Also need to change Tile names in metadata.geojson file to sample

In [None]:
# Path to the metadata file
geojson_file = '/content/drive/MyDrive/PASTIS_Data/metadata.geojson'

# List to store selected patch IDs
file_list = []

try:
    with open(geojson_file, 'r') as file:
        geojson_data = json.load(file)

    count = 0
    target_tile = "roi_pallab_200"  # Change as needed: "roi_pallab_100", "roi_pallab_200", "roi_pallab_300"

    for feature in geojson_data['features']:
        if 'TILE' in feature['properties']:
            if feature['properties']['TILE'] == target_tile:
                count += 1
                if count <= 100:
                    file_list.append(feature['properties']['ID_PATCH'])

    print(f"✅ Sampled {len(file_list)} patches from {target_tile}")

except Exception as e:
    print(f"❌ Error processing GeoJSON file: {e}")


File List: [10101, 10102, 10103, 10104, 10105, 10106, 10107, 10108, 10109, 10110, 10111, 10112, 10113, 10114, 10115, 10116, 10117, 10118, 10119, 10120, 10121, 10122, 10123, 10124, 10125, 10126, 10127, 10128, 10129, 10130, 10131, 10132, 10133, 10134, 10135, 10136, 10137, 10138, 10139, 10140, 10141, 10142, 10143, 10144, 10145, 10146, 10147, 10148, 10149, 10150, 10151, 10152, 10153, 10154, 10155, 10156, 10157, 10158, 10159, 10160, 10161, 10162, 10163, 10164, 10165, 10166, 10167, 10168, 10169, 10170, 10171, 10172, 10173, 10174, 10175, 10176, 10177, 10178, 10179, 10180, 10181, 10182, 10183, 10184, 10185, 10186, 10187, 10188, 10189, 10190, 10191, 10192, 10193, 10194, 10195, 10196, 10197, 10198, 10199, 10200]



## 🎛️ Noise Implementation - Adding Noise to 25% of Training Data

In [None]:
# Function to add Gaussian noise to image data
def add_gaussian_noise(data, mean=0, std=25):
    """Applies Gaussian noise to an image."""
    gauss = np.random.normal(mean, std, data.shape).astype('float32')
    return data + gauss

# Define directory containing the dataset
input_dir = '/content/drive/MyDrive/PASTIS_Data/DATA_S2'


# Define start and end indices for applying noise
start_idx = 10101
end_idx = 10200

print("🖼️ Applying Gaussian noise to selected images...")


# Iterate through dataset and apply noise to selected files
for filename in os.listdir(input_dir):
    if filename.endswith('.npy'):
        idx = int(filename.split('_')[1].split('.')[0])
        input_path = os.path.join(input_dir, filename)

        if os.path.isfile(input_path) and start_idx <= idx <= end_idx:
            data = np.load(input_path)
            noisy_data = add_gaussian_noise(data)
            np.save(input_path, noisy_data)
            print(f"🎨 Processed with noise: {input_path}")
        else:
            print(f"⚠️ Skipping: {input_path} (Outside range or missing)")

## 🛠️ Cloning Required GitHub Repositories

In [None]:
print("📥 Cloning required repositories...")
!git clone https://github.com/VSainteuf/utae-paps.git

In [None]:
!git clone https://github.com/VSainteuf/pastis-benchmark.git

## 📂 Verifying Dataset Files in Drive

In [None]:
# Define data directory
data_dir = '/content/drive/MyDrive/PASTIS_Data'

# List all files in the directory
files = os.listdir(data_dir)

# Print the file names
print("📂 Files in dataset directory:", files)
#['NORM_S2_patch.json', 'ANNOTATIONS', 'DATA_S2', 'INSTANCE_ANNOTATIONS', 'metadata.geojson', '.ipynb_checkpoints']

## 🏋️ Training and Inference

In [None]:
print("🚀 Training segmentation model...")
!python train_semantic.py --fold 1 --dataset_folder $data_dir --res_dir '/content/outpath'

print("✅ Training complete! Results saved to '/content/outpath'")