In [37]:
import geopandas as gpd
import folium
import leafmap.leafmap as leafmap


In [38]:
region = 'sylhet'
meta_data_path = f"../data/processed_data/{region}/metadata.geojson"
region_meta_data = gpd.read_file(meta_data_path)


display(region_meta_data.head(2))


Unnamed: 0,x_idx,y_idx,x,y,geometry
0,23936,28544,10230945,2779247,"POLYGON ((10229417.009 2780776.7, 10232474.49 ..."
1,23360,28544,10228194,2779247,"POLYGON ((10226665.276 2780776.7, 10229722.757..."


In [39]:
region_meta_data = region_meta_data.to_crs("EPSG:3857")
display(region_meta_data.head(2))

Unnamed: 0,x_idx,y_idx,x,y,geometry
0,23936,28544,10230945,2779247,"POLYGON ((10229417.009 2780776.7, 10232474.49 ..."
1,23360,28544,10228194,2779247,"POLYGON ((10226665.276 2780776.7, 10229722.757..."


In [40]:
#print the first geometry
first_geometry = region_meta_data.iloc[[0]]
m = leafmap.Map()
m.add_basemap("HYBRID")
m.add_gdf(region_meta_data, layer_name="Shape", style={"color": "black"},zoom_to_layer=15)
m.add_gdf(first_geometry,zoom_to_layer=True)


m.add_gdf(first_geometry, layer_name="First Geometry", style={"color": "red"}, zoom_to_layer=True)
m




Map(center=[20, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text…

In [41]:
from os.path import join
from glob import glob
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
from time import time
from joblib import Parallel, delayed
from tqdm import tqdm

In [42]:
# region = ''
image_dir=f"../data/processed_data/{region}/images/"
prediction_dir=f"../data/predict/processed_labels/{region}/"
metadata_path=f"../data/processed_data/{region}/metadata.geojson"
save_dir=f"../data/predict/processed_labels/{region}"

print(f"Image dir: {image_dir}")
print(f"Prediction dir: {prediction_dir}")
print(f"Metadata path: {metadata_path}")
print(f"Save dir: {save_dir}")




Image dir: ../data/processed_data/sylhet/images/
Prediction dir: ../data/predict/processed_labels/sylhet/
Metadata path: ../data/processed_data/sylhet/metadata.geojson
Save dir: ../data/predict/processed_labels/sylhet


In [43]:
print(f"Image directory: {image_dir}")
print(f"Prediction directory: {prediction_dir}")

image_paths = glob(join(image_dir, "*"))
print(f"Number of images: {len(image_paths)}")
prediction_path = join(prediction_dir, "labels", "*")
print("Reading predictions from", prediction_path)
prediction_paths = glob(prediction_path)
print(f"Number of predictions: {len(prediction_paths)}")
print(f"Number of images with at least one prediction: {len(prediction_paths)}")
metadata_gdf = gpd.read_file(metadata_path)
print(f"Number of metadata entries: {len(metadata_gdf)}")
# target_gdf = gpd.read_file(target_geojson_path)
# print(f"Number of ground truth labels: {len(target_gdf)}")
# print(f"Time taken to read files: {time() - init:.2f}s")

Image directory: ../data/processed_data/sylhet/images/
Prediction directory: ../data/predict/processed_labels/sylhet/
Number of images: 1782
Reading predictions from ../data/predict/processed_labels/sylhet/labels/*
Number of predictions: 163
Number of images with at least one prediction: 163
Number of metadata entries: 1782


In [44]:
print("Predictions head")
print(metadata_gdf.head(2))

Predictions head
   x_idx  y_idx         x        y  \
0  23936  28544  10230945  2779247   
1  23360  28544  10228194  2779247   

                                            geometry  
0  POLYGON ((10229417.009 2780776.7, 10232474.49 ...  
1  POLYGON ((10226665.276 2780776.7, 10229722.757...  


In [45]:
classes = ["CFCBK", "FCBK", "Zigzag"]
conf_threshold=0.25
nms_iou=0.5
task="obb"
def obb_load_prediction(row,task=task,classes=classes):
    try:
        prediction = np.loadtxt(join(prediction_dir, "labels", f"{row['x']}_{row['y']}.txt"), ndmin=2)
    except FileNotFoundError:
        prediction = np.zeros((0, 10)) if task == "obb" else np.zeros((0, 6))

    # Preserve original prediction for later
    original_prediction = prediction.copy().tolist()
    original_prediction = ["_".join(map(str, x)) for x in original_prediction]

    # scale predictions
    min_x, min_y, max_x, max_y = row["geometry"].bounds
    prediction[:, 1:-1:2] = prediction[:, 1:-1:2] * (max_x - min_x) + min_x
    prediction[:, 2:-1:2] = (1 - prediction[:, 2:-1:2]) * (max_y - min_y) + min_y
    class_names = [classes[int(cls_id)] for cls_id in prediction[:, 0]]
    confidence = prediction[:, -1].tolist()

    box = prediction[:, 1:-1]
    return box, class_names, confidence, original_prediction



In [46]:
init = time()
metadata_gdf[["box", "class_name", "confidence", "yolo_label"]] = metadata_gdf.apply(
    obb_load_prediction, axis=1, result_type="expand"
)
print("Predictions head after getting box, class_name, confidence")
# print(metadata_gdf.head(2))
print(f"Time taken to load predictions: {time() - init:.2f}s")

init = time()
print("Length before explode: ", len(metadata_gdf))
metadata_gdf = metadata_gdf.apply(pd.Series.explode).reset_index(drop=True)
print("Length after explode: ", len(metadata_gdf))
metadata_gdf = metadata_gdf.dropna(subset=["box"]).reset_index(drop=True)
print("Length after dropping NaN: ", len(metadata_gdf))
# print(metadata_gdf.head(2))
print(f"Time taken to explode predictions: {time() - init:.2f}s")

print("Length before conf filtering: ", len(metadata_gdf))
metadata_gdf = metadata_gdf[metadata_gdf["confidence"] >= float(conf_threshold)]
print("Length after conf filtering: ", len(metadata_gdf))

init = time()

Predictions head after getting box, class_name, confidence
Time taken to load predictions: 0.51s
Length before explode:  1782
Length after explode:  1862
Length after dropping NaN:  243
Time taken to explode predictions: 0.01s
Length before conf filtering:  243
Length after conf filtering:  243


In [47]:
init = time()
if task == "obb":
    #     # metadata_gdf["label_geometry"] = metadata_gdf["box"].apply(
    #     #     lambda box: Polygon(
    #     #         [
    #     #             (box[0], box[1]),
    #     #             (box[2], box[3]),
    #     #             (box[4], box[5]),
    #     #             (box[6], box[7]),
    #     #             (box[0], box[1]),
    #     #         ]
    #     #     )
    #     # )
    # metadata_gdf["label_geometry"] = np.apply_along_axis(
    #     lambda x: Polygon(x.reshape(-1, 2)), 1, np.asarray(metadata_gdf.box.tolist())
    # )
    # metadata_gdf["label_geometry"] = Parallel(n_jobs=32)(
    #     delayed(Polygon)(box.reshape(-1, 2)) for box in tqdm(metadata_gdf.box)
    # )
    metadata_gdf["label_geometry"] = metadata_gdf["box"].apply(lambda x: Polygon(x.reshape(-1, 2)))
    # metadata_gdf["label_geometry"] = metadata_gdf.box.apply(
    #     lambda x: {"type": "Polygon", "coordinates": [x.reshape(-1, 2).tolist()]}
    # )
# else:
#     metadata_gdf["label_geometry"] = metadata_gdf["box"].apply(
#         lambda box: Polygon(
#             [
#                 (box[0] - box[2] / 2, box[1] - box[3] / 2),
#                 (box[0] + box[2] / 2, box[1] - box[3] / 2),
#                 (box[0] + box[2] / 2, box[1] + box[3] / 2),
#                 (box[0] - box[2] / 2, box[1] + box[3] / 2),
#                 (box[0] - box[2] / 2, box[1] - box[3] / 2),
#             ]
#         )
#     )
print(metadata_gdf.head(2))
print(f"Time taken to convert predictions to geometry: {time() - init:.2f}s")

crs = metadata_gdf.crs
metadata_gdf.drop(columns=["box", "x_idx", "y_idx", "geometry"], inplace=True)
metadata_gdf.rename(columns={"label_geometry": "geometry"}, inplace=True)
metadata_gdf.set_geometry("geometry", inplace=True)
metadata_gdf.crs = crs

############# Overlap removal
metadata_gdf.reset_index(drop=True, inplace=True)
print(f"{type(metadata_gdf)=}")
print(f"{metadata_gdf.columns=}")
print(f"{metadata_gdf.crs=}")
intersection_gdf = gpd.sjoin(metadata_gdf, metadata_gdf, predicate="intersects")
# remove same points and duplicate pairs
intersection_gdf = intersection_gdf[intersection_gdf.index < intersection_gdf.index_right][
    ["index_right"]
].reset_index(drop=False)
intersection_gdf.rename(columns={"index": "index_left"}, inplace=True)
print(f"{intersection_gdf.head(2)=}")
def get_iou(row):
    geometry_left = metadata_gdf.loc[row.index_left, "geometry"]
    geometry_right = metadata_gdf.loc[row.index_right, "geometry"]
    return geometry_left.intersection(geometry_right).area / geometry_left.union(geometry_right).area



intersection_gdf["iou"] = intersection_gdf.apply(get_iou, axis=1)

def get_remove_indices(row):
    if row.iou >= nms_iou:
        left_area = metadata_gdf.loc[row.index_left, "geometry"].area
        right_area = metadata_gdf.loc[row.index_right, "geometry"].area
        return row.index_left if left_area > right_area else row.index_right

intersection_gdf["index_remove"] = intersection_gdf.apply(get_remove_indices, axis=1)
intersection_gdf.dropna(subset=["index_remove"], inplace=True)
print("Size before NMS: ", len(metadata_gdf))
metadata_gdf.drop(index=intersection_gdf["index_remove"], inplace=True)
print("Size after NMS: ", len(metadata_gdf))

print(f"{metadata_gdf.class_name.isnull().sum()=}")

metadata_gdf["confidence"] = metadata_gdf["confidence"].astype(float)
print(f"{metadata_gdf.dtypes=}")

init = time()
metadata_gdf.to_file(
    join(save_dir, f"predictions_{conf_threshold}.geojson"),
    driver="GeoJSON",
)
print(f"Time taken to save predictions: {time() - init:.2f}s")
# print(f"Total time taken: {time() - overall_init:.2f}s")

   x_idx  y_idx         x        y  \
0  22784  27392  10225442  2784751   
1  22208  25664  10222690  2793006   

                                            geometry  \
0  POLYGON ((10223913.543 2786280.166, 10226971.0...   
1  POLYGON ((10221161.81 2794535.365, 10224219.29...   

                                                 box class_name confidence  \
0  [10224998.446929628, 2783223.852839994, 102250...     Zigzag   0.782646   
1  [10223238.13885824, 2793805.119330983, 1022328...     Zigzag   0.832081   

                                          yolo_label  \
0  2.0_0.354836_0.999618_0.368349_0.99992_0.36882...   
1  2.0_0.679098_0.238839_0.692999_0.241333_0.6984...   

                                      label_geometry  
0  POLYGON ((10224998.446929628 2783223.852839994...  
1  POLYGON ((10223238.13885824 2793805.119330983,...  
Time taken to convert predictions to geometry: 0.01s
type(metadata_gdf)=<class 'geopandas.geodataframe.GeoDataFrame'>
metadata_gdf.columns=Index(['

In [48]:
print("Predictions head after saving")
print(metadata_gdf.head(2))
# print total number of predictions
print(f"Number of predictions: {len(metadata_gdf)}")

Predictions head after saving
          x        y class_name  confidence  \
0  10225442  2784751     Zigzag    0.782646   
1  10222690  2793006     Zigzag    0.832081   

                                          yolo_label  \
0  2.0_0.354836_0.999618_0.368349_0.99992_0.36882...   
1  2.0_0.679098_0.238839_0.692999_0.241333_0.6984...   

                                            geometry  
0  POLYGON ((10224998.447 2783223.853, 10225039.7...  
1  POLYGON ((10223238.139 2793805.119, 10223280.6...  
Number of predictions: 202
