<a href="https://colab.research.google.com/github/ombuijabali/Coconut_Pedcition_Model/blob/main/Model_with_multiple_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Install package

In [1]:
!pip install geoai-py



#Import libraries

In [2]:
import torch
torch.cuda.is_available()
import os
import hashlib
import json
import geoai
import shutil

Define directories

In [3]:
out_folder = "/content/drive/MyDrive/Ombui/output"
model_dir = f"{out_folder}/models"
hash_file = os.path.join(model_dir, "data_hashes.json")
model_path = os.path.join(model_dir, "mask_rcnn_trained.pth")

# List of training raster and vector files (Add more as needed)

In [4]:

train_raster_urls = [
    "https://huggingface.co/datasets/giswqs/geospatial/resolve/main/naip_train.tif",
    "https://huggingface.co/datasets/construmgis/geospatial/resolve/main/reproject1.tif",
    "https://huggingface.co/datasets/construmgis/geospatial/resolve/main/reproject2.tif"
]
train_vector_urls = [
    "https://huggingface.co/datasets/giswqs/geospatial/resolve/main/naip_train_buildings.geojson",
    "https://huggingface.co/datasets/construmgis/geospatial/resolve/main/reproject1.geojson",
    "https://huggingface.co/datasets/construmgis/geospatial/resolve/main/reproject2.geojson"
    ]

test_raster_url = (
    "https://huggingface.co/datasets/giswqs/geospatial/resolve/main/naip_test.tif"
)

Function to compute file hash

In [5]:
def compute_file_hash(filepath):
    """Returns SHA256 hash of a file."""
    hasher = hashlib.sha256()
    with open(filepath, "rb") as f:
        while chunk := f.read(8192):
            hasher.update(chunk)
    return hasher.hexdigest()

Function to check if training data has changed

In [6]:
def has_training_data_changed(train_raster_paths, train_vector_paths):
    """Checks if the training data has changed by comparing file hashes."""
    current_hashes = {fp: compute_file_hash(fp) for fp in train_raster_paths + train_vector_paths}

    if not os.path.exists(hash_file):
        return True  # No previous hash file, assume new data

    with open(hash_file, "r") as f:
        previous_hashes = json.load(f)

    return previous_hashes != current_hashes  # Returns True if data has changed

# Download all files

In [7]:
train_raster_paths = [geoai.download_file(url) for url in train_raster_urls]
train_vector_paths = [geoai.download_file(url) for url in train_vector_urls]
test_raster_path = geoai.download_file(test_raster_url)

naip_train.tif: 100%|██████████| 12.1M/12.1M [00:00<00:00, 54.2MB/s]
reproject1.tif: 100%|██████████| 3.52M/3.52M [00:00<00:00, 32.2MB/s]
reproject2.tif: 100%|██████████| 112M/112M [00:03<00:00, 29.4MB/s]
naip_train_buildings.geojson: 100%|██████████| 456k/456k [00:00<00:00, 2.76MB/s]
reproject1.geojson: 100%|██████████| 44.7k/44.7k [00:00<00:00, 41.3MB/s]
reproject2.geojson: 100%|██████████| 8.61k/8.61k [00:00<00:00, 20.6MB/s]
naip_test.tif: 100%|██████████| 19.7M/19.7M [00:00<00:00, 33.8MB/s]


# View first vector over a raster (optional)

In [8]:
geoai.view_vector_interactive(train_vector_paths[0], tiles=train_raster_urls[0])
geoai.view_raster(test_raster_url)

# Export tiles from multiple raster/vector pairs

In [9]:
for train_raster, train_vector in zip(train_raster_paths, train_vector_paths):
    geoai.export_geotiff_tiles(
        in_raster=train_raster,
        out_folder=out_folder,
        in_class_data=train_vector,
        tile_size=512,
        stride=256,
        buffer_radius=0,
    )


Raster info for naip_train.tif:
  CRS: EPSG:26911
  Dimensions: 2503 x 1126
  Resolution: (0.5999999999999953, 0.5999999999996691)
  Bands: 4
  Bounds: BoundingBox(left=454780.8, bottom=5277567.0, right=456282.6, top=5278242.6)
Loaded 722 features from naip_train_buildings.geojson
Vector CRS: EPSG:4326
Reprojecting features from EPSG:4326 to EPSG:26911
Found 6 unique classes: ['apartments' None 'terrace' 'detached' 'house' 'shed']


Generated: 36, With features: 36: 100%|██████████| 36/36 [01:03<00:00,  1.75s/it]



------- Export Summary -------
Total tiles exported: 36
Tiles with features: 36 (100.0%)
Average feature pixels per tile: 45966.3
Output saved to: /content/drive/MyDrive/Ombui/output

------- Georeference Verification -------

Raster info for reproject1.tif:
  CRS: EPSG:26915
  Dimensions: 1180 x 780
  Resolution: (0.26040991525426194, 0.2604098717950714)
  Bands: 4
  Bounds: BoundingBox(left=269642.8991, bottom=3291203.3059, right=269950.1828, top=3291406.4256)
Loaded 102 features from reproject1.geojson
Vector CRS: EPSG:26915


Generated: 12, With features: 12: 100%|██████████| 12/12 [00:02<00:00,  5.69it/s]



------- Export Summary -------
Total tiles exported: 12
Tiles with features: 12 (100.0%)
Average feature pixels per tile: 67793.0
Output saved to: /content/drive/MyDrive/Ombui/output

------- Georeference Verification -------

Raster info for reproject2.tif:
  CRS: EPSG:26915
  Dimensions: 8452 x 4619
  Resolution: (0.0648530170373901, 0.0648530201342632)
  Bands: 3
  Bounds: BoundingBox(left=265336.7508, bottom=3285532.5448, right=265884.8885, top=3285832.1009)
Loaded 18 features from reproject2.geojson
Vector CRS: EPSG:26915


Generated: 594, With features: 307: 100%|██████████| 594/594 [03:23<00:00,  2.92it/s]


------- Export Summary -------
Total tiles exported: 594
Tiles with features: 307 (51.7%)
Average feature pixels per tile: 82232.3
Output saved to: /content/drive/MyDrive/Ombui/output

------- Georeference Verification -------





# Train the model with multiple images

In [10]:
# Step 1: Check if model exists (first time or not)
is_first_run = not os.path.exists(model_path)
model_exists = not is_first_run

# Step 2: Compute training hashes
current_hashes = {fp: compute_file_hash(fp) for fp in train_raster_paths + train_vector_paths}
previous_hashes = {}

if os.path.exists(hash_file):
    with open(hash_file, "r") as f:
        previous_hashes = json.load(f)

# Step 3: Determine if data has changed
has_new_data = current_hashes != previous_hashes

# Step 4: Train only if needed or first run
if is_first_run or has_new_data:
    print("Preparing to train the model...")

    # Before training, check if image and label count matches
    num_images = len(os.listdir(f"{out_folder}/images"))
    num_labels = len(os.listdir(f"{out_folder}/labels"))

    if num_images == 0 or num_labels == 0:
        raise ValueError("No training data found in images or labels folders.")

    if num_images != num_labels:
        raise ValueError(f"Image-label mismatch: {num_images} images vs {num_labels} labels")

    print("Training model (first time run)" if is_first_run else "Training model due to new data...")

    geoai.train_MaskRCNN_model(
        images_dir=f"{out_folder}/images",
        labels_dir=f"{out_folder}/labels",
        output_dir=model_dir,
        num_channels=3,
        pretrained=True,
        batch_size=4,
        num_epochs=10,
        learning_rate=0.0005,
        val_split=0.2,
        pretrained_model_path=model_path if model_exists else None
    )

    # Check if training output exists
    trained_model_path = os.path.join(model_dir, "best_model.pth")
    if os.path.exists(trained_model_path):
        shutil.copy(trained_model_path, model_path)
        with open(hash_file, "w") as f:
            json.dump(current_hashes, f)
        print(f"Model saved to {model_path}")
    else:
        raise FileNotFoundError("Training failed — 'best_model.pth' was not created.")
else:
    print(f"Using existing trained model from {model_path}")

Preparing to train the model...
Training model (first time run)
Using device: cuda
Found 594 image files and 594 label files
Training on 475 images, validating on 119 images


Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|██████████| 170M/170M [00:01<00:00, 148MB/s]


Epoch: 0, Batch: 0/119, Loss: 2.4950, Time: 6.26s
Epoch: 0, Batch: 10/119, Loss: 0.8767, Time: 8.02s
Epoch: 0, Batch: 20/119, Loss: 0.4677, Time: 8.25s
Epoch: 0, Batch: 30/119, Loss: 0.8192, Time: 7.77s
Epoch: 0, Batch: 40/119, Loss: 0.5139, Time: 8.06s
Epoch: 0, Batch: 50/119, Loss: 0.3352, Time: 7.94s
Epoch: 0, Batch: 60/119, Loss: 0.6592, Time: 8.19s
Epoch: 0, Batch: 70/119, Loss: 0.5648, Time: 8.19s
Epoch: 0, Batch: 80/119, Loss: 0.4459, Time: 8.22s
Epoch: 0, Batch: 90/119, Loss: 0.2490, Time: 8.47s
Epoch: 0, Batch: 100/119, Loss: 0.3302, Time: 8.45s
Epoch: 0, Batch: 110/119, Loss: 0.5959, Time: 8.75s
Epoch 1/10: Train Loss: 0.6384, Val Loss: inf, Val IoU: 0.3175
Saving best model with IoU: 0.3175
Epoch: 1, Batch: 0/119, Loss: 1.1133, Time: 2.47s
Epoch: 1, Batch: 10/119, Loss: 0.2772, Time: 8.88s
Epoch: 1, Batch: 20/119, Loss: 0.4053, Time: 8.70s
Epoch: 1, Batch: 30/119, Loss: 0.1578, Time: 8.61s
Epoch: 1, Batch: 40/119, Loss: 0.4483, Time: 8.51s
Epoch: 1, Batch: 50/119, Loss: 0.38

# Object detection on test raster

In [11]:
masks_path = "naip_test_prediction.tif"
model_path = f"{out_folder}/models/best_model.pth"

geoai.object_detection(
    test_raster_path,
    masks_path,
    model_path,
    window_size=512,
    overlap=256,
    confidence_threshold=0.5,
    batch_size=4,
    num_channels=3,
)

Processing 65 windows with size 512x512 and overlap 256...


84it [00:09,  8.99it/s]

Inference completed in 9.38 seconds
Saved prediction to naip_test_prediction.tif





# Convert predicted masks to GeoJSON

In [12]:
output_path = "naip_test_prediction.geojson"
gdf = geoai.orthogonalize(masks_path, output_path, epsilon=2)

Processing 31 features...


Converting features: 100%|██████████| 31/31 [00:01<00:00, 30.28shape/s]

Saving to naip_test_prediction.geojson...
Done!





# View results

In [13]:
geoai.view_vector_interactive(output_path, tiles=test_raster_url)

# Create a split map for comparison

In [14]:
geoai.create_split_map(
    left_layer=output_path,
    right_layer=test_raster_url,
    left_args={"style": {"color": "red", "fillOpacity": 0.2}},
    basemap=test_raster_url,
)

Map(center=[20, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text…