In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os

cache_dir = '/content/drive/MyDrive/Bachelorarbeit/samgeo_package'

if os.path.exists(cache_dir):
    import sys
    sys.path.append(cache_dir)
    print("Using cached version of segment-geospatial.")

Using cached version of segment-geospatial.


In [3]:
import torch
from samgeo import SamGeo, SamGeo2, regularize
import geopandas as gpd
from samgeo.text_sam import LangSAM
from samgeo.hq_sam import SamGeo as HQSamGeo
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from skimage import data
import tifffile
import rasterio
from rasterio import Affine
from rasterio.enums import Compression
from pathlib import Path
import csv

In [4]:
# Hilfsfunktion, um alle Bilder zu segmentieren
def iterate_folders(base_dir: str, function):
    base = Path(base_dir)
    for sub in base.iterdir():
        if not sub.is_dir():
            continue
        print(f"Processing folder: {sub.name}")
        tifs = [p for p in sub.glob("*.tif") if not p.name.endswith(".aux.xml")]
        for tif in tifs:
          function(tif)

# Samgeo2 automatic

In [None]:
def run_samgeo2_raw(src_path: Path):

  out_dir = "/content/drive/MyDrive/Bachelorarbeit/results"

  sam_2 = SamGeo2(
      device='cuda',
      apply_postprocessing=False,
      points_per_side=32,
      points_per_batch=64,
      pred_iou_thresh=0.8,
      stability_score_thresh=0.95,
      stability_score_offset=1,
      crop_n_layers=0,
      box_nms_thresh=0.7,
      crop_n_points_downscale_factor=1,
      min_mask_region_area=0,
      use_m2m=False
      )

  tiff_mask = os.path.join(out_dir, f"{src_path.stem}_sam_raw.tif")
  sam_2.generate(str(src_path), tiff_mask)

  vector_mask = os.path.join(out_dir, f"{src_path.stem}_sam_raw.gpkg")
  sam_2.raster_to_vector(tiff_mask, vector_mask)

In [None]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_cir", run_samgeo2_raw)

Processing folder: Wohngegend


sam2_hiera_large.pt:   0%|          | 0.00/898M [00:00<?, ?B/s]

Processing folder: Land
Processing folder: Stadt
Processing folder: Gewerbegebiet


# Samgeo2 point prompt

In [12]:
def run_samgeo2_point_prompt(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results/SAMGeo/Sam_2_point_prompt/mimsize1000_thresh2000"

    csv_files = list(src_path.parent.glob("*.csv"))
    csv_file = csv_files[0]

    coords = []
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        for row in reader:
            x = float(row[0])
            y = float(row[1])
            coords.append([x, y])

    sam_2 = SamGeo2(device='cuda', automatic=False)
    sam_2.set_image(str(src_path))
    tiff_mask = os.path.join(out_dir, f"a{src_path.stem}_sam_point_prompt.tif")

    sam_2.predict_by_points(
        point_coords_batch=coords,
        point_crs="EPSG:25832",
        output=tiff_mask,
        multimask_output=False,
        )

    vector_mask = os.path.join(out_dir, f"a{src_path.stem}_minsize1000_thresh2000.gpkg")
    tiff_mask_regioned = os.path.join(out_dir, f"a{src_path.stem}_sam_point_prompt_regioned.tif")

    array, gdf = sam_2.region_groups(
        tiff_mask,
        min_size=1000,
        out_vector=vector_mask,
        out_image=tiff_mask_regioned
        )

    vector_mask_regularized = os.path.join(out_dir, f"a{src_path.stem}_regularized.gpkg")
    regularize(vector_mask, vector_mask_regularized, crs="EPSG:25832")

In [13]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_rgb", run_samgeo2_point_prompt)

Processing folder: Land
Processing folder: Stadt
Processing folder: Wohngegend
Processing folder: Gewerbegebiet


# Samgeo2 text prompt

In [17]:
def run_samgeo2_text_prompt(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results/SAMGeo/sam_streets"
    text_prompt = "streets"

    lang_sam = LangSAM(model_type="sam2-hiera-large")

    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_sam2_text_prompt.tif")

    lang_sam.predict(
        image=str(src_path),
        output=tiff_mask,
        text_prompt=text_prompt,
        box_threshold=0.2,
        text_threshold=0.4)

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_sam2_text_prompt.gpkg")
    lang_sam.raster_to_vector(tiff_mask, vector_mask)

    vector_mask_regularized = os.path.join(out_dir, f"{src_path.stem}__sam2_regularized.gpkg")
    regularize(vector_mask, vector_mask_regularized)

In [18]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_rgb", run_samgeo2_text_prompt)

Processing folder: Land
final text_encoder_type: bert-base-uncased
Processing folder: Stadt
final text_encoder_type: bert-base-uncased
Processing folder: Wohngegend
final text_encoder_type: bert-base-uncased
Processing folder: Gewerbegebiet
final text_encoder_type: bert-base-uncased


# HQSam automatic

In [None]:
def run_hq_sam(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results"

    hq_sam = HQSamGeo(model_type="vit_h", device="cuda", hq=True)

    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_hq_sam.tif")
    hq_sam.generate(str(src_path), tiff_mask)

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_hq_sam.gpkg")
    hq_sam.raster_to_vector(tiff_mask, vector_mask)

In [None]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_cir", run_hq_sam)

Processing folder: Land
<All keys matched successfully>
Processing folder: Stadt
<All keys matched successfully>
Processing folder: Wohngegend
<All keys matched successfully>
Processing folder: Gewerbegebiet
<All keys matched successfully>


# HQSam point prompt

In [6]:
def run_hq_sam_point_prompt(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results/SAMGeo/HQ_Sam_point_prompt"
    csv_files = list(src_path.parent.glob("*.csv"))
    csv_file = csv_files[0]

    coords = []
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        for row in reader:
            x = float(row[0])
            y = float(row[1])
            coords.append([x, y])

    hq_sam = HQSamGeo(model_type="vit_h", device="cuda", automatic=False)
    hq_sam.set_image(str(src_path))
    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_hq_sam_point_prompt_2.tif")

    hq_sam.predict(
        point_coords=coords,
        point_crs="EPSG:25832",
        output=tiff_mask
        )

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_hq_sam_point_prompt_2.gpkg")
    hq_sam.raster_to_vector(tiff_mask, vector_mask)

    vector_mask_regularized = os.path.join(out_dir, f"{src_path.stem}_regularized_2.gpkg")
    regularize(vector_mask, vector_mask_regularized, crs="EPSG:25832")

In [7]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_rgb", run_hq_sam_point_prompt)

Processing folder: Land
Model checkpoint for vit_h not found.


Downloading...
From: https://github.com/opengeos/datasets/releases/download/models/sam_hq_vit_h.zip
To: /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip
100%|██████████| 1.02G/1.02G [01:47<00:00, 9.48MB/s]
Downloading...
From: https://github.com/opengeos/datasets/releases/download/models/sam_hq_vit_h.z01
To: /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.z01
100%|██████████| 1.36G/1.36G [02:19<00:00, 9.77MB/s]
INFO patool: Extracting /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip ...
INFO:patool:Extracting /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip ...
INFO patool: running /usr/bin/7z x -aou -o/root/.cache/torch/hub/checkpoints -- /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip
INFO:patool:running /usr/bin/7z x -aou -o/root/.cache/torch/hub/checkpoints -- /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip
INFO patool: ... /root/.cache/torch/hub/checkpoints/sam_hq_vit_h.zip extracted to `/root/.cache/torch/hub/checkpoints'.
INFO:patool:... /root/.cache/torch/hub/c

<All keys matched successfully>
Processing folder: Stadt
<All keys matched successfully>
Processing folder: Wohngegend
<All keys matched successfully>
Processing folder: Gewerbegebiet
<All keys matched successfully>


# Samgeo1 automatic

In [None]:
def run_samgeo1_automatic(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results"
    sam_kwargs = {
        "points_per_side": 32,
        "pred_iou_thresh": 0.88,
        "stability_score_thresh": 0.95,
        "crop_n_layers": 0,
        "crop_n_points_downscale_factor": 1,
        "min_mask_region_area": 0,
        }

    sam = SamGeo(model_type="vit_h", device="cuda", sam_kwargs=sam_kwargs)

    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_sam_raw.tif")
    sam.generate(str(src_path), tiff_mask)

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_sam_raw.gpkg")
    sam.raster_to_vector(tiff_mask, vector_mask)

In [None]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_cir", run_samgeo1_automatic)

NameError: name 'iterate_folders' is not defined

# Samgeo1 point prompt

In [None]:
def run_samgeo1_point_prompt(src_path: Path):
    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results/SAMGeo/Sam_1_point_prompt"
    csv_files = list(src_path.parent.glob("*.csv"))
    csv_file = csv_files[0]

    coords = []
    with open(csv_file, newline='') as f:
        reader = csv.reader(f)
        for row in reader:
            x = float(row[0])
            y = float(row[1])
            coords.append([x, y])

    sam = SamGeo(model_type="vit_h", device="cuda", automatic=False)
    sam.set_image(str(src_path))
    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_sam1_point_prompt.tif")

    sam.predict(
        point_coords=coords,
        point_crs="EPSG:25832",
        output=tiff_mask
        )

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_sam1_point_prompt.gpkg")
    sam.raster_to_vector(tiff_mask, vector_mask)

    vector_mask_regularized = os.path.join(out_dir, f"{src_path.stem}_regularized.gpkg")
    regularize(vector_mask, vector_mask_regularized)

In [None]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_rgb", run_samgeo1_point_prompt)

Downloading...
From: https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
To: /root/.cache/torch/hub/checkpoints/sam_vit_h_4b8939.pth


Processing folder: Land
Model checkpoint for vit_h not found.


100%|██████████| 2.56G/2.56G [00:43<00:00, 59.4MB/s]


Processing folder: Stadt
Processing folder: Wohngegend
Processing folder: Gewerbegebiet


# Samgeo1 text prompt

In [14]:
def run_samgeo1_text_prompt(src_path: Path):

    out_dir = "/content/drive/MyDrive/Bachelorarbeit/results/SAMGeo/sam_streets"
    text_prompt = "streets"

    lang_sam = LangSAM(model_type="vit_h")

    tiff_mask = os.path.join(out_dir, f"{src_path.stem}_sam1_text_prompt.tif")

    lang_sam.predict(
         image=str(src_path),
         output=tiff_mask,
          text_prompt=text_prompt,
          box_threshold=0.2,
          text_threshold=0.4)

    vector_mask = os.path.join(out_dir, f"{src_path.stem}_sam1_text_prompt.gpkg")
    lang_sam.raster_to_vector(tiff_mask, vector_mask)

    vector_mask_regularized = os.path.join(out_dir, f"{src_path.stem}_sam1_regularized.gpkg")
    regularize(vector_mask, vector_mask_regularized)

In [15]:
iterate_folders("/content/drive/MyDrive/Bachelorarbeit/data/szenarien_rgb", run_samgeo1_text_prompt)

Processing folder: Land


GroundingDINO_SwinB.cfg.py: 0.00B [00:00, ?B/s]

final text_encoder_type: bert-base-uncased


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

groundingdino_swinb_cogcoor.pth:   0%|          | 0.00/938M [00:00<?, ?B/s]

Downloading: "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth" to /root/.cache/torch/hub/checkpoints/sam_vit_h_4b8939.pth
100%|██████████| 2.39G/2.39G [00:27<00:00, 93.1MB/s]


Processing folder: Stadt
final text_encoder_type: bert-base-uncased
Processing folder: Wohngegend
final text_encoder_type: bert-base-uncased
Processing folder: Gewerbegebiet
final text_encoder_type: bert-base-uncased
