In [5]:
import pandas as pd
import matplotlib.pyplot as plt


In [11]:
import pandas as pd

path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"

df = pd.read_csv(path)

# replace all NaN with 0000
df = df.fillna(0000)

# overwrite the same CSV
df.to_csv(path, index=False)

df.tail()

Unnamed: 0,Index,Type,Lat,Lon,Year made,fcb to zigzag,Circular FCB to Zigzag,circular FCB to Oval FCB,ZigZag to FCB,Year Demolished
791,791,1,28.99912,77.44388,2009,2018,0.0,0.0,0.0,0
792,792,1,28.23366,77.41032,2015,2018,0.0,0.0,0.0,0
793,793,1,28.22596,77.41453,2018,2018,0.0,0.0,0.0,0
794,794,1,28.74257,77.43613,2010,2017,0.0,0.0,0.0,0
795,795,1,28.73612,77.44105,2010,2020,0.0,0.0,0.0,0


In [9]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import geemap

# paths
points_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed.csv"

# ---------- load points ----------
df = pd.read_csv(points_path)

# ensure numeric
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")

# drop rows with invalid coords
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# ---------- load polygons (CSV with WKT geometry) ----------
df_polys = pd.read_csv(polys_path)

# parse WKT -> shapely geometry
df_polys["geometry"] = df_polys["geometry"].apply(wkt.loads)

gdf_polys = gpd.GeoDataFrame(df_polys, geometry="geometry", crs="EPSG:4326")

# ---------- spatial join ----------
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[["class_name", "Name", "geometry"]],
    how="left",
    predicate="within"
)

# ---------- counts ----------
count_by_name = (
    joined.groupby("Name", dropna=False)
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

count_by_class = (
    joined.groupby("class_name", dropna=False)
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

print("Count by polygon Name (NaN = outside all polygons):")
print(count_by_name.to_string(index=False))

print("\nCount by class_name (NaN = outside all polygons):")
print(count_by_class.to_string(index=False))

# ---------- geemap plot ----------
inside = joined[~joined["Name"].isna()].copy()
outside = joined[joined["Name"].isna()].copy()

m = geemap.Map(center=[28.6, 77.2], zoom=9)
m.add_basemap("SATELLITE")

m.add_gdf(
    gdf_polys,
    layer_name="Delhi airshed polygons",
    style={"color": "yellow", "fillColor": "#00000000", "weight": 2}
)

m.add_gdf(
    inside,
    layer_name="Kilns inside airshed",
    style={"color": "red", "pointSize": 5}
)

m.add_gdf(
    outside,
    layer_name="Kilns outside airshed",
    style={"color": "cyan", "pointSize": 4}
)

m

Count by polygon Name (NaN = outside all polygons):
         Name  count
delhi_airshed    718
          NaN     80

Count by class_name (NaN = outside all polygons):
class_name  count
    Zigzag    684
       NaN     80
      FCBK     34


Map(center=[28.6, 77.2], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright', …

In [10]:
# number of points that do NOT fall inside any polygon
num_outside = joined["Name"].isna().sum()

# total points
total_points = len(joined)

print(f"Total markers          : {total_points}")
print(f"Markers outside polygons: {num_outside}")
print(f"Markers inside polygons : {total_points - num_outside}")

Total markers          : 798
Markers outside polygons: 80
Markers inside polygons : 718


In [20]:
# pick a unique polygon id column
poly_id = "index" if "index" in gdf_polys.columns else "__poly_id__"
if poly_id == "__poly_id__":
    gdf_polys[poly_id] = gdf_polys.index

# spatial join with polygon id
joined2 = gpd.sjoin(
    gdf_points,
    gdf_polys[[poly_id, "class_name", "Name", "geometry"]],
    how="left",
    predicate="within"
)

# markers per polygon (only inside)
poly_marker_counts = (
    joined2.dropna(subset=[poly_id])
    .groupby(poly_id)
    .size()
    .rename("marker_count")
    .reset_index()
    .sort_values("marker_count", ascending=False)
)

print("Top polygons by marker_count:")
print(poly_marker_counts.head(20).to_string(index=False))

# how many polygons have >=2 markers (same polygon)
polys_with_multiple = poly_marker_counts[poly_marker_counts["marker_count"] >= 2]
print("\nPolygons with >=2 markers:", len(polys_with_multiple))

# how many markers are in those multi-marker polygons (total duplicates mass)
markers_in_multi_polys = polys_with_multiple["marker_count"].sum()
extra_markers_due_to_duplicates = (polys_with_multiple["marker_count"] - 1).sum()

print("Total markers inside polygons:", len(joined2.dropna(subset=[poly_id])))
print("Markers that lie in multi-marker polygons:", int(markers_in_multi_polys))
print("Extra markers beyond 1 per polygon:", int(extra_markers_due_to_duplicates))

# optional: attach counts back to polygons for mapping/inspection
gdf_polys_counts = gdf_polys.merge(poly_marker_counts, on=poly_id, how="left")
gdf_polys_counts["marker_count"] = gdf_polys_counts["marker_count"].fillna(0).astype(int)

# list polygons with multiple markers with metadata
cols = [poly_id] + [c for c in ["Name", "class_name"] if c in gdf_polys_counts.columns] + ["marker_count"]
print("\nPolygons having multiple markers:")
print(gdf_polys_counts[gdf_polys_counts["marker_count"] >= 2][cols].sort_values("marker_count", ascending=False).to_string(index=False))

Top polygons by marker_count:
 __poly_id__  marker_count
       633.0             2
       576.0             2
       588.0             2
       587.0             2
        79.0             2
       267.0             2
        71.0             2
       162.0             2
       336.0             2
        67.0             2
        64.0             2
       150.0             2
        61.0             2
        59.0             2
       261.0             2
       434.0             2
       258.0             2
        54.0             2
       347.0             2
       569.0             2

Polygons with >=2 markers: 66
Total markers inside polygons: 718
Markers that lie in multi-marker polygons: 132
Extra markers beyond 1 per polygon: 66

Polygons having multiple markers:
 __poly_id__          Name class_name  marker_count
           3 delhi_airshed     Zigzag             2
         462 delhi_airshed     Zigzag             2
         305 delhi_airshed     Zigzag             2
        

In [21]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import geemap

points_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed.csv"

# points
df = pd.read_csv(points_path)
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# polygons (CSV WKT)
df_polys = pd.read_csv(polys_path)
df_polys["geometry"] = df_polys["geometry"].apply(wkt.loads)
gdf_polys = gpd.GeoDataFrame(df_polys, geometry="geometry", crs="EPSG:4326")

# unique polygon id
poly_id = "index" if "index" in gdf_polys.columns else "__poly_id__"
if poly_id == "__poly_id__":
    gdf_polys[poly_id] = gdf_polys.index

# join
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[[poly_id, "geometry"]],
    how="left",
    predicate="within"
)

# counts per polygon
counts = (
    joined.dropna(subset=[poly_id])
    .groupby(poly_id)
    .size()
    .rename("marker_count")
    .reset_index()
)

# polygons with >=2 markers
polys_multi = gdf_polys.merge(counts, on=poly_id, how="left")
polys_multi["marker_count"] = polys_multi["marker_count"].fillna(0).astype(int)
polys_multi = polys_multi[polys_multi["marker_count"] >= 2]

# markers that fall in those polygons
multi_ids = set(polys_multi[poly_id].tolist())
markers_multi = joined[joined[poly_id].isin(multi_ids)].copy()

print("Polygons with >=2 markers:", len(polys_multi))
print("Markers inside those polygons:", len(markers_multi))

# geemap plot (only these)
m = geemap.Map(center=[28.6, 77.2], zoom=9)
m.add_basemap("SATELLITE")

m.add_gdf(
    polys_multi,
    layer_name="Polygons with >=2 markers",
    style={"color": "lime", "fillColor": "#00000000", "weight": 3}
)

m.add_gdf(
    markers_multi,
    layer_name="Markers in those polygons",
    style={"color": "red", "pointSize": 6}
)

m

Polygons with >=2 markers: 66
Markers inside those polygons: 132


Map(center=[28.6, 77.2], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright', …

In [22]:
import pandas as pd

path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"

df = pd.read_csv(path)

# ensure numeric
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")

df = df.dropna(subset=["Lat", "Lon"]).copy()

# count occurrences of each (Lat, Lon)
latlon_counts = (
    df.groupby(["Lat", "Lon"])
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

# only common (duplicates)
duplicates = latlon_counts[latlon_counts["count"] > 1]

print("Total rows:", len(df))
print("Unique Lat–Lon pairs:", len(latlon_counts))
print("Duplicate Lat–Lon pairs:", len(duplicates))
print("Total duplicate rows (extra beyond 1):", int((duplicates["count"] - 1).sum()))

print("\nDuplicate Lat–Lon pairs:")
print(duplicates.to_string(index=False))

Total rows: 796
Unique Lat–Lon pairs: 792
Duplicate Lat–Lon pairs: 4
Total duplicate rows (extra beyond 1): 4

Duplicate Lat–Lon pairs:
     Lat      Lon  count
28.29928 77.05538      2
28.89475 77.25475      2
28.78600 77.35718      2
28.93150 76.91468      2


In [None]:
# ---- polygons with >=2 markers (which polygons) ----
cols_poly = [poly_id]
if "Name" in gdf_polys.columns: cols_poly.append("Name")
if "class_name" in gdf_polys.columns: cols_poly.append("class_name")
cols_poly.append("marker_count")

polys_multi_table = polys_multi[cols_poly].sort_values("marker_count", ascending=False)
print("Polygons with >=2 markers (sorted):")
print(polys_multi_table.to_string(index=False))

print("\nHow many polygons have >=2 markers:", len(polys_multi_table))

# ---- corresponding rows from your point CSV (which markers fall in those polygons) ----
# keep original point columns (drop geometry)
point_cols = [c for c in df.columns if c not in ["geometry"]]

# markers_multi currently is the joined GeoDataFrame; get its point rows + polygon id
markers_multi_rows = markers_multi[point_cols + [poly_id]].copy()

# sort by polygon then by lat/lon for readability
markers_multi_rows = markers_multi_rows.sort_values([poly_id, "Lat", "Lon"])

print("\nPoint-CSV rows that belong to polygons with >=2 markers:")
print(markers_multi_rows.to_string(index=False))

# # optional: save both outputs
# out_poly = points_path.replace(".csv", "_polygons_ge2_markers.csv")
# out_pts  = points_path.replace(".csv", "_points_in_polygons_ge2_markers.csv")
# polys_multi_table.to_csv(out_poly, index=False)
# markers_multi_rows.to_csv(out_pts, index=False)
# print("\nSaved:")
# print(out_poly)
# print(out_pts)

Polygons with >=2 markers (sorted):
 __poly_id__          Name class_name  marker_count
           3 delhi_airshed     Zigzag             2
         462 delhi_airshed     Zigzag             2
         305 delhi_airshed     Zigzag             2
         313 delhi_airshed     Zigzag             2
         314 delhi_airshed     Zigzag             2
         317 delhi_airshed     Zigzag             2
         336 delhi_airshed     Zigzag             2
         347 delhi_airshed     Zigzag             2
         370 delhi_airshed     Zigzag             2
         388 delhi_airshed     Zigzag             2
         398 delhi_airshed     Zigzag             2
         400 delhi_airshed     Zigzag             2
         412 delhi_airshed     Zigzag             2
         423 delhi_airshed       FCBK             2
         434 delhi_airshed     Zigzag             2
         454 delhi_airshed       FCBK             2
         469 delhi_airshed     Zigzag             2
           5 delhi_airshed  

In [29]:
# work on the joined dataframe so polygon id is available
df_dedup = markers_multi.copy()

# ensure year is numeric
df_dedup["Year made"] = pd.to_numeric(df_dedup["Year made"], errors="coerce")

# sort to make keep-first deterministic (optional)
df_dedup = df_dedup.sort_values([poly_id, "Year made", "Lat", "Lon"])

# drop duplicates: same polygon + same year
df_dedup = df_dedup.drop_duplicates(
    subset=[poly_id, "Year made"],
    keep="first"
)

print("Rows after removing same-year duplicates per polygon:", len(df_dedup))

print(df_dedup.head())
print(df_dedup.to_string(index=False))


Rows after removing same-year duplicates per polygon: 71
     Index  Type       Lat       Lon  Year made  fcb to zigzag  \
527    527     1  28.94546  76.91489       2015           2017   
668    668     1  28.89266  77.02469       2009           2015   
592    592     1  28.93150  76.91468       2011           2017   
560    560     1  28.25142  77.40510       2010           2019   
265    265     1  28.92216  76.88463       2011           2019   

     Circular FCB to Zigzag  circular FCB to Oval FCB  ZigZag to FCB  \
527                     0.0                       0.0            0.0   
668                     0.0                       0.0            0.0   
592                     0.0                       0.0            0.0   
560                     0.0                       0.0            0.0   
265                     0.0                       0.0            0.0   

    Year Demolished                   geometry  index_right  __poly_id__  
527               0  POINT (76.91489 2

In [30]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

points_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed.csv"

# ---------------- points ----------------
df = pd.read_csv(points_path)
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# ---------------- polygons ----------------
df_polys = pd.read_csv(polys_path)
df_polys["geometry"] = df_polys["geometry"].apply(wkt.loads)
gdf_polys = gpd.GeoDataFrame(df_polys, geometry="geometry", crs="EPSG:4326")

poly_id = "index" if "index" in gdf_polys.columns else "__poly_id__"
if poly_id == "__poly_id__":
    gdf_polys[poly_id] = gdf_polys.index

# ---------------- join + identify duplicate polygons ----------------
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[[poly_id, "geometry"]],
    how="left",
    predicate="within"
)

# polygons that have >=2 markers (ignore outside polygons)
poly_counts = joined.dropna(subset=[poly_id]).groupby(poly_id).size()
dup_poly_ids = poly_counts[poly_counts >= 2].index.tolist()

# rows in those polygons
dup_rows = joined[joined[poly_id].isin(dup_poly_ids)].copy()

# ---------------- drop "second one" (keep first per polygon) ----------------
# deterministic ordering: keep the first after sorting
dup_rows = dup_rows.sort_values([poly_id, "Lat", "Lon"])

keep_first_idx = dup_rows.groupby(poly_id, sort=False).head(1).index
drop_idx = dup_rows.index.difference(keep_first_idx)

print("Duplicate polygons:", len(dup_poly_ids))
print("Rows to drop (second+ in each duplicate polygon):", len(drop_idx))

# drop from original df using the same indices (joined kept original index)
df_clean = df.drop(index=drop_idx)

# ---------------- save ----------------
out_path = points_path.replace(".csv", "_dedup_one_marker_per_polygon.csv")
df_clean.to_csv(out_path, index=False)
print("Saved:", out_path)

Duplicate polygons: 66
Rows to drop (second+ in each duplicate polygon): 65
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_dedup_one_marker_per_polygon.csv


In [32]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import geemap

# paths
points_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_dedup_one_marker_per_polygon.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed.csv"

# ---------- load points ----------
df = pd.read_csv(points_path)

# ensure numeric
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")

# drop rows with invalid coords
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# ---------- load polygons (CSV with WKT geometry) ----------
df_polys = pd.read_csv(polys_path)

# parse WKT -> shapely geometry
df_polys["geometry"] = df_polys["geometry"].apply(wkt.loads)

gdf_polys = gpd.GeoDataFrame(df_polys, geometry="geometry", crs="EPSG:4326")

# ---------- spatial join ----------
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[["class_name", "Name", "geometry"]],
    how="left",
    predicate="within"
)

# ---------- counts ----------
count_by_name = (
    joined.groupby("Name", dropna=False)
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

count_by_class = (
    joined.groupby("class_name", dropna=False)
    .size()
    .reset_index(name="count")
    .sort_values("count", ascending=False)
)

print("Count by polygon Name (NaN = outside all polygons):")
print(count_by_name.to_string(index=False))

print("\nCount by class_name (NaN = outside all polygons):")
print(count_by_class.to_string(index=False))

# ---------- geemap plot ----------
inside = joined[~joined["Name"].isna()].copy()
outside = joined[joined["Name"].isna()].copy()

m = geemap.Map(center=[28.6, 77.2], zoom=9)
m.add_basemap("SATELLITE")

m.add_gdf(
    gdf_polys,
    layer_name="Delhi airshed polygons",
    style={"color": "yellow", "fillColor": "#00000000", "weight": 2}
)

m.add_gdf(
    inside,
    layer_name="Kilns inside airshed",
    style={"color": "red", "pointSize": 5}
)

m.add_gdf(
    outside,
    layer_name="Kilns outside airshed",
    style={"color": "cyan", "pointSize": 4}
)

m

Count by polygon Name (NaN = outside all polygons):
         Name  count
delhi_airshed    652
          NaN     80

Count by class_name (NaN = outside all polygons):
class_name  count
    Zigzag    621
       NaN     80
      FCBK     31


Map(center=[28.6, 77.2], controls=(WidgetControl(options=['position', 'transparent_bg'], position='topright', …

In [33]:
# number of points that do NOT fall inside any polygon
num_outside = joined["Name"].isna().sum()

# total points
total_points = len(joined)

print(f"Total markers          : {total_points}")
print(f"Markers outside polygons: {num_outside}")
print(f"Markers inside polygons : {total_points - num_outside}")

Total markers          : 732
Markers outside polygons: 80
Markers inside polygons : 652


In [34]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

points_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_dedup_one_marker_per_polygon.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed.csv"

# ---------- points ----------
df = pd.read_csv(points_path)
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# ---------- polygons (CSV WKT) ----------
df_polys = pd.read_csv(polys_path)
df_polys["geometry"] = df_polys["geometry"].apply(wkt.loads)
gdf_polys = gpd.GeoDataFrame(df_polys, geometry="geometry", crs="EPSG:4326")

# choose unique polygon id
poly_id = "index" if "index" in gdf_polys.columns else "__poly_id__"
if poly_id == "__poly_id__":
    gdf_polys[poly_id] = gdf_polys.index

# ---------- spatial join (polygon id only) ----------
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[[poly_id, "geometry"]],
    how="left",
    predicate="within"
)

# polygons that have >=1 marker
hit_ids = set(joined.dropna(subset=[poly_id])[poly_id].unique().tolist())

# split polygons
gdf_with = gdf_polys[gdf_polys[poly_id].isin(hit_ids)].copy()
gdf_without = gdf_polys[~gdf_polys[poly_id].isin(hit_ids)].copy()

print("Total polygons:", len(gdf_polys))
print("Polygons WITH markers:", len(gdf_with))
print("Polygons WITHOUT markers:", len(gdf_without))

# ---------- save as CSV (keep WKT geometry) ----------
df_with = gdf_with.copy()
df_without = gdf_without.copy()
df_with["geometry"] = df_with["geometry"].apply(lambda g: g.wkt)
df_without["geometry"] = df_without["geometry"].apply(lambda g: g.wkt)

out_with = polys_path.replace(".csv", "_WITH_markers.csv")
out_without = polys_path.replace(".csv", "_WITHOUT_markers.csv")

df_with.to_csv(out_with, index=False)
df_without.to_csv(out_without, index=False)

print("Saved:", out_with)
print("Saved:", out_without)

Total polygons: 753
Polygons WITH markers: 652
Polygons WITHOUT markers: 101
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers.csv
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers.csv


In [None]:
import pandas as pd

report_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/download_report_all.csv"
polys_path  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers.csv"
/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers.csv

YEAR = 2025

# ------------------ load ------------------
rep = pd.read_csv(report_path)
pol = pd.read_csv(polys_path)

# ------------------ normalize polygon centroids to filename format ------------------
pol["center_lat_6"] = pd.to_numeric(pol["center_lat"], errors="coerce").map(lambda x: f"{x:.6f}")
pol["center_lon_6"] = pd.to_numeric(pol["center_lon"], errors="coerce").map(lambda x: f"{x:.6f}")

pol["expected_fname"] = pol["center_lat_6"] + "_" + pol["center_lon_6"] + f"_{YEAR}.png"

# report already has full relative path; extract basename
rep["fname"] = rep["path"].astype(str).str.split("/").str[-1]

# ------------------ mapping (polygon row -> image row) ------------------
mapped = pol.merge(
    rep[["idx", "center_lat", "center_lon", "year", "status", "crop_size", "path", "fname"]],
    left_on="expected_fname",
    right_on="fname",
    how="left",
    indicator=True
)

# ------------------ diagnostics ------------------
print("Total polygons:", len(pol))
print("Matched to images:", (mapped["_merge"] == "both").sum())
print("No match:", (mapped["_merge"] != "both").sum())
print("\nStatus counts among matched:")
print(mapped.loc[mapped["_merge"] == "both", "status"].value_counts(dropna=False))

# ------------------ save mapping ------------------
out_path = polys_path.replace(".csv", "_mapped_to_download_report.csv")
mapped.to_csv(out_path, index=False)
print("Saved:", out_path)

# optional: only unmatched polygons
out_unmatched = polys_path.replace(".csv", "_unmatched_to_download_report.csv")
mapped[mapped["_merge"] != "both"].to_csv(out_unmatched, index=False)
print("Saved:", out_unmatched)

Total polygons: 652
Matched to images: 618
No match: 34

Status counts among matched:
ok    618
Name: status, dtype: int64
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_mapped_to_download_report.csv
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_unmatched_to_download_report.csv


In [45]:
import pandas as pd

report_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/download_report_all.csv"
with_path   = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers.csv"
without_path= "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers.csv"

YEAR = 2025

rep = pd.read_csv(report_path)
rep["fname"] = rep["path"].astype(str).str.split("/").str[-1]

def map_polygons(poly_csv_path: str, tag: str):
    pol = pd.read_csv(poly_csv_path)

    pol["center_lat_6"] = pd.to_numeric(pol["center_lat"], errors="coerce").map(lambda x: f"{x:.6f}")
    pol["center_lon_6"] = pd.to_numeric(pol["center_lon"], errors="coerce").map(lambda x: f"{x:.6f}")
    pol["expected_fname"] = pol["center_lat_6"] + "_" + pol["center_lon_6"] + f"_{YEAR}.png"
    pol["marker_flag"] = tag  # WITH or WITHOUT

    mapped = pol.merge(
        rep[["idx", "center_lat", "center_lon", "year", "status", "crop_size", "path", "fname"]],
        left_on="expected_fname",
        right_on="fname",
        how="left",
        indicator=True
    )

    out_all = poly_csv_path.replace(".csv", "_mapped_to_download_report.csv")
    out_unmatched = poly_csv_path.replace(".csv", "_unmatched_to_download_report.csv")

    mapped.to_csv(out_all, index=False)
    mapped[mapped["_merge"] != "both"].to_csv(out_unmatched, index=False)

    print(f"\n[{tag}] Total polygons:", len(pol))
    print(f"[{tag}] Matched to images:", (mapped['_merge'] == 'both').sum())
    print(f"[{tag}] No match:", (mapped['_merge'] != 'both').sum())

    return mapped, out_all, out_unmatched

mapped_with, out_with_all, out_with_unmatched = map_polygons(with_path, "WITH")
mapped_without, out_wo_all, out_wo_unmatched  = map_polygons(without_path, "WITHOUT")

# combined mapping file (both CSVs together)
combined = pd.concat([mapped_with, mapped_without], ignore_index=True)
combined_out = report_path.replace("download_report_all.csv", "polygons_WITH_AND_WITHOUT_mapped_to_download_report.csv")
combined.to_csv(combined_out, index=False)

print("\nSaved combined:", combined_out)
print("Saved WITH:", out_with_all)
print("Saved WITH unmatched:", out_with_unmatched)
print("Saved WITHOUT:", out_wo_all)
print("Saved WITHOUT unmatched:", out_wo_unmatched)


[WITH] Total polygons: 652
[WITH] Matched to images: 618
[WITH] No match: 34

[WITHOUT] Total polygons: 101
[WITHOUT] Matched to images: 88
[WITHOUT] No match: 13

Saved combined: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_WITH_AND_WITHOUT_mapped_to_download_report.csv
Saved WITH: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_mapped_to_download_report.csv
Saved WITH unmatched: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_unmatched_to_download_report.csv
Saved WITHOUT: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers_mapped_to_download_report.csv
Saved WITHOUT unmatched: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers_unmatched_to_download_repor

In [49]:
print(pd.read_csv(with_path, nrows=1).columns.tolist())

['class_name', 'max_lon', 'min_lon', 'max_lat', 'min_lat', 'center_lat', 'center_lon', 'country', 'geometry', 'index_right', 'Name', '__poly_id__']


In [54]:
import pandas as pd

report_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/download_report_all.csv"
with_path   = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers.csv"
without_path= "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers.csv"

YEAR = 2025

# ---------------- report ----------------
rep = pd.read_csv(report_path)
rep["fname"] = rep["path"].astype(str).str.split("/").str[-1]

# 5-digit fallback keys
rep["lat5"] = pd.to_numeric(rep["center_lat"], errors="coerce").round(5)
rep["lon5"] = pd.to_numeric(rep["center_lon"], errors="coerce").round(5)

rep["ok_rank"] = (rep["status"] == "ok").astype(int)
rep5 = (
    rep.sort_values(["ok_rank"], ascending=False)
       .drop_duplicates(subset=["lat5", "lon5"], keep="first")
       .copy()
)

def match_with_fallback(poly_csv_path: str, tag: str):
    pol = pd.read_csv(poly_csv_path)

    # stable polygon coordinates
    pol["poly_center_lat"] = pd.to_numeric(pol["center_lat"], errors="coerce")
    pol["poly_center_lon"] = pd.to_numeric(pol["center_lon"], errors="coerce")

    # -------- 6dp exact filename match --------
    pol["lat6"] = pol["poly_center_lat"].round(6)
    pol["lon6"] = pol["poly_center_lon"].round(6)
    pol["expected_fname_6"] = (
        pol["lat6"].map(lambda x: f"{x:.6f}") + "_" +
        pol["lon6"].map(lambda x: f"{x:.6f}") + f"_{YEAR}.png"
    )

    m6 = pol.merge(
        rep[["idx","center_lat","center_lon","year","status","crop_size","path","fname"]],
        left_on="expected_fname_6",
        right_on="fname",
        how="left",
        indicator="merge_6dp",
        suffixes=("", "_rep")
    )

    hit6  = m6[m6["merge_6dp"] == "both"].copy()
    miss6 = m6[m6["merge_6dp"] != "both"].copy()

    # -------- 5dp fallback (only for 6dp failures) --------
    miss6["lat5"] = miss6["poly_center_lat"].round(5)
    miss6["lon5"] = miss6["poly_center_lon"].round(5)

    m5 = miss6.merge(
        rep5[["idx","center_lat","center_lon","year","status","crop_size","path","lat5","lon5"]],
        on=["lat5","lon5"],
        how="left",
        indicator="merge_5dp",
        suffixes=("", "_rep5")
    )

    out = pd.concat([hit6, m5], ignore_index=True)
    out["marker_flag"] = tag
    out["match_level"] = "none"
    out.loc[out["merge_6dp"] == "both", "match_level"] = "6dp"
    out.loc[(out["match_level"] == "none") & (out["merge_5dp"] == "both"), "match_level"] = "5dp"

    print(f"\n[{tag}] total:", len(out))
    print(f"[{tag}] matched 6dp:", (out["match_level"] == "6dp").sum())
    print(f"[{tag}] matched 5dp:", (out["match_level"] == "5dp").sum())
    print(f"[{tag}] still unmatched:", (out["match_level"] == "none").sum())

    out_all = poly_csv_path.replace(".csv", "_mapped_6dp_then_5dp.csv")
    out_none = poly_csv_path.replace(".csv", "_still_unmatched_after_5dp.csv")

    out.to_csv(out_all, index=False)
    out[out["match_level"] == "none"].to_csv(out_none, index=False)

    print("Saved:", out_all)
    print("Saved:", out_none)

    return out

mapped_with = match_with_fallback(with_path, "WITH")
mapped_without = match_with_fallback(without_path, "WITHOUT")

combined = pd.concat([mapped_with, mapped_without], ignore_index=True)
combined_out = report_path.replace(
    "download_report_all.csv",
    "polygons_mapped_6dp_then_5dp_combined.csv"
)
combined.to_csv(combined_out, index=False)
print("\nSaved combined:", combined_out)


[WITH] total: 652
[WITH] matched 6dp: 618
[WITH] matched 5dp: 34
[WITH] still unmatched: 0
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_mapped_6dp_then_5dp.csv
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITH_markers_still_unmatched_after_5dp.csv

[WITHOUT] total: 101
[WITHOUT] matched 6dp: 85
[WITHOUT] matched 5dp: 16
[WITHOUT] still unmatched: 0
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers_mapped_6dp_then_5dp.csv
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/kilns_in_delhi_airshed_WITHOUT_markers_still_unmatched_after_5dp.csv

Saved combined: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined.csv


In [58]:
import pandas as pd

in_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined.csv"

df = pd.read_csv(in_path)

# choose the correct filename column
# prefer fname (6dp), else extract from path_rep5 (5dp fallback)
df["filename"] = df["fname"]

mask_5dp = df["filename"].isna() & df["path_rep5"].notna()
df.loc[mask_5dp, "filename"] = df.loc[mask_5dp, "path_rep5"].astype(str).str.split("/").str[-1]

# keep only rows that actually matched to an image
df_clean = df[df["filename"].notna()][["filename", "geometry"]].copy()

out_path = in_path.replace(".csv", "_filename_geometry_only.csv")
df_clean.to_csv(out_path, index=False)

print("Saved:", out_path)
print("Rows:", len(df_clean))

Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined_filename_geometry_only.csv
Rows: 753


In [60]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

markers_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1.csv"
poly_map_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined_filename_geometry_only.csv"  # <- set your real filename, must have filename,geometry

# -------- markers (points) --------
df = pd.read_csv(markers_path)
df["Lat"] = pd.to_numeric(df["Lat"], errors="coerce")
df["Lon"] = pd.to_numeric(df["Lon"], errors="coerce")
df = df.dropna(subset=["Lat", "Lon"]).copy()

gdf_points = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df["Lon"], df["Lat"]),
    crs="EPSG:4326"
)

# -------- polygons with filename --------
pm = pd.read_csv(poly_map_path)
pm["geometry"] = pm["geometry"].apply(wkt.loads)
gdf_polys = gpd.GeoDataFrame(pm, geometry="geometry", crs="EPSG:4326")

# -------- join: each marker -> which polygon/filename it falls in --------
joined = gpd.sjoin(
    gdf_points,
    gdf_polys[["filename", "geometry"]],
    how="left",
    predicate="within"
)

inside = joined.dropna(subset=["filename"]).copy()

print("Total markers:", len(gdf_points))
print("Markers inside any polygon with filename:", len(inside))
print("Markers outside polygons:", len(gdf_points) - len(inside))

# -------- enforce one marker -> one filename -> one geometry (dedup) --------
# if multiple markers land in same polygon, keep only first
inside_unique = inside.sort_values(["filename", "Lat", "Lon"]).drop_duplicates(subset=["filename"], keep="first")

print("\nUnique filename assignments (one marker per filename):", len(inside_unique))
print("Extra markers landing in already-used filename polygons:", len(inside) - len(inside_unique))

# -------- save the one-to-one mapping --------
out = inside_unique.drop(columns=["geometry", "index_right"], errors="ignore").copy()
# keep a clean set of columns (marker info + filename)
keep_cols = ["filename"] + [c for c in df.columns if c in out.columns and c not in ["geometry"]]
out = out[keep_cols]

out_path = markers_path.replace(".csv", "_markers_mapped_to_filename_one_to_one.csv")
out.to_csv(out_path, index=False)
print("Saved:", out_path)

Total markers: 796
Markers inside any polygon with filename: 718
Markers outside polygons: 78

Unique filename assignments (one marker per filename): 652
Extra markers landing in already-used filename polygons: 66
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_markers_mapped_to_filename_one_to_one.csv


In [None]:
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

# paths
img_dir = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/panels_with_polygon"
mapped_csv = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_markers_mapped_to_filename_one_to_one.csv"

# filenames that HAVE markers
mapped = pd.read_csv(mapped_csv)
files_with_markers = set(mapped["filename"].astype(str))

# all images
all_images = sorted(f for f in os.listdir(img_dir) if f.endswith(".png"))

# images WITHOUT markers
no_marker_images = [f for f in all_images if f not in files_with_markers]

print("Images without markers:", len(no_marker_images))

# plot only images without markers
for i, fname in enumerate(no_marker_images, 1):
    img_path = os.path.join(img_dir, fname)
    img = Image.open(img_path)

    plt.figure(figsize=(5, 5))
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"No marker ({i}/{len(no_marker_images)})\n{fname}")
    plt.show()

In [2]:
import pandas as pd

# paths
poly_map_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined_filename_geometry_only.csv"
mapped_marker_path = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/brick_kiln_delhi_v1_markers_mapped_to_filename_one_to_one.csv"

# -------- load --------
polys = pd.read_csv(poly_map_path)          # has: filename, geometry
mapped = pd.read_csv(mapped_marker_path)    # has: filename + marker info

# -------- filenames --------
all_filenames = set(polys["filename"].astype(str))
used_filenames = set(mapped["filename"].astype(str))

# -------- filenames with NO markers --------
no_marker_filenames = sorted(all_filenames - used_filenames)

print("Total polygons/images:", len(all_filenames))
print("Images with markers:", len(used_filenames))
print("Images without markers:", len(no_marker_filenames))

# -------- save clean CSV --------
out_df = pd.DataFrame({"filename": no_marker_filenames})

out_path = poly_map_path.replace(
    ".csv",
    "_filenames_without_markers.csv"
)
out_df.to_csv(out_path, index=False)

print("Saved:", out_path)

Total polygons/images: 753
Images with markers: 652
Images without markers: 101
Saved: /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/temporal-analysis/data/delhi_airshed_y_2025_z_17_buf_25m/polygons_mapped_6dp_then_5dp_combined_filename_geometry_only_filenames_without_markers.csv
