## Imports and functions

In [2]:
import os
import geopandas as gpd
import sys
import numpy as np
import cv2
import pandas as pd
import xarray as xr
sys.path.insert(0, os.path.abspath('..'))
from limosat import Keypoints, Matcher, ImageProcessor
from limosat.catalog import create_image_gdf
%matplotlib inline

## Loading data

In [None]:
# Define paths
IMAGE_DIR = "/path/to/images"
METADATA_PATH = "/path/to/catalog.geojson"

# Check if the metadata file already exists
if os.path.exists(METADATA_PATH):
    # Load existing file
    print(f"Loading existing image metadata from {METADATA_PATH}")
    images_gdf = gpd.read_file(METADATA_PATH)
    # Ensure timestamp is in datetime format
    if 'timestamp' in images_gdf.columns:
        images_gdf['timestamp'] = pd.to_datetime(images_gdf['timestamp'])
    print(f"Loaded {len(images_gdf)} image records")
else:
    # Create new metadata
    print(f"Creating new image metadata for directory: {IMAGE_DIR}")
    images_gdf = create_image_gdf(
        IMAGE_DIR, 
        max_workers=8
    )
    
    # Save the metadata
    os.makedirs(os.path.dirname(METADATA_PATH), exist_ok=True)
    images_gdf.to_file(METADATA_PATH, driver='GeoJSON')
    print(f"Saved {len(images_gdf)} image records to {METADATA_PATH}")

In [None]:
run_name = "limosat_drift"

points = Keypoints()  # Reinitialize the Keypoints object
templates = xr.DataArray(
    dims=("trajectory_id", "height", "width"),
    coords={"trajectory_id": range(0), "height": np.arange(33), "width": np.arange(33)},  # Adjust size as needed
)

## Workflow

In [None]:
# First create the processor with all configuration
processor = ImageProcessor(
    points=points,
    templates=templates,
    
    model=cv2.ORB_create(
        nfeatures=100,        
        scaleFactor=1.2,        
        nlevels=4,              
        edgeThreshold=15,     
        firstLevel=0,
        patchSize=31           
    ),
    
    matcher = Matcher(),
    min_correlation=0.35,
    use_interpolation=True,
    run_name=run_name,
)

In [None]:
for image_id, filename in enumerate(images_gdf['filename'], start = 1):
    processor.process_image(image_id, filename)

## Plotting

In [None]:
# Randomly sample 20% of points
random_sample = processor.points.sample(frac=0.1)
trajectory_counts = processor.points['trajectory_id'].value_counts()
random_sample['traj_count'] = random_sample['trajectory_id'].map(trajectory_counts)
random_sample['time'] = pd.to_datetime(random_sample['time'], utc=True).dt.tz_convert('UTC')
# Plot the sampled points
random_sample.plot(column='traj_count', figsize=(10, 10))