In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import leafmap

In [2]:
state_epsg_pairs = (['uttar_pradesh', 'bihar', 'west_bengal', 'haryana', 'punjab'], [7775, 7759, 7787, 7762, 7773])
state_epsg_mapping = dict(zip(*state_epsg_pairs))
state_epsg_mapping

In [3]:
state_zone_mapping = {"uttar_pradesh": "central-zone", "bihar": "eastern-zone", "west_bengal": "eastern-zone", "haryana": "northern-zone", "punjab": "northern-zone"}

In [4]:
hospital_data_gov = pd.read_csv("India_Hospital_Data.csv")


def try_except(x):
    try:
        return float(x)
    except:
        return np.nan

hospital_data_gov['lat'] = hospital_data_gov['lat'].apply(try_except)
hospital_data_gov['lon'] = hospital_data_gov['lon'].apply(try_except)
hospital_data_gov = hospital_data_gov.dropna(subset=['lat', 'lon'])

print(len(hospital_data_gov))
hospital_data_gov.head(2)

In [5]:
hospital_data_gov_gdf = gpd.GeoDataFrame(hospital_data_gov, geometry=gpd.points_from_xy(hospital_data_gov.lon, hospital_data_gov.lat))
hospital_data_gov_gdf.crs = "EPSG:4326"
hospital_data_gov_gdf.head(2)

## State-wise

In [6]:
state = "haryana"
epsg = state_epsg_mapping[state]
zone = state_zone_mapping[state]
print(state, epsg, zone)

In [7]:
building_gdf = gpd.read_file(f"gis_osm_buildings_a_free_1.shp")

In [8]:
building_gdf['type'].value_counts().head(50)

In [9]:
lu_gdf = gpd.read_file(f"gis_osm_landuse_a_free_1.shp")
lu_gdf.fclass.value_counts().head(50)

In [10]:
water_gdf = gpd.read_file(f"gis_osm_water_a_free_1.shp")
water_gdf.fclass.value_counts().head(50)

In [11]:
waterways_gdf = gpd.read_file(f"gis_osm_waterways_free_1.shp")
waterways_gdf.fclass.value_counts().head(50)

In [12]:
roads_gdf = gpd.read_file(f"gis_osm_roads_free_1.shp")
roads_gdf.fclass.value_counts().head(50)

In [13]:
railways_gdf = gpd.read_file(f"gis_osm_railways_free_1.shp")
railways_gdf.fclass.value_counts().head(50)

In [14]:
lu_gdf[lu_gdf.fclass == "orchard"].name.value_counts()

In [15]:
kilns_gdf = gpd.read_file(f"labels/{state}.geojson").to_crs(epsg)

if state == "bihar":
  osm_dict = {"Habitation": (lu_gdf[lu_gdf.fclass == "residential"], 800),
                "Orchard": (lu_gdf[lu_gdf.fclass == "orchard"], 800),
                  "School": (building_gdf[building_gdf['type'] == "school"], 800),
                  "Hospital_OSM": (building_gdf[building_gdf['type'] == "hospital"], 800),
                  "Hospital_Gov": (hospital_data_gov_gdf, 800),
                  # "Nature reserve": (lu_gdf[lu_gdf.fclass == "nature_reserve"], 
                  "River": (waterways_gdf[waterways_gdf.fclass == "river"], 500),
                  "Wetland": (water_gdf[water_gdf.fclass == "wetland"], 500),
                  "National highway": (roads_gdf[(roads_gdf.ref.str.startswith("NH") == True) | (roads_gdf.ref.str.startswith("NE") == True)], 300),
                  "State highway": (roads_gdf[roads_gdf.ref.str.startswith("SH") == True], 200),
                  # "District highway": roads_gdf[roads_gdf.ref.str.startswith("MDR") == True],
                  "Railway": (railways_gdf, 200),
                  "Inter kiln": (kilns_gdf.copy(), 1000),
                  }
elif state == "uttar_pradesh":
  osm_dict = {"Habitation": (lu_gdf[lu_gdf.fclass == "residential"], 1000),
              "Orchard": (lu_gdf[lu_gdf.fclass == "orchard"], 800),
              "School": (building_gdf[building_gdf['type'] == "school"], 1000),
                  "Hospital_OSM": (building_gdf[building_gdf['type'] == "hospital"], 1000),
                  "Hospital_Gov": (hospital_data_gov_gdf, 1000),
                  "Religious places": (building_gdf[building_gdf['type'].isin(["temple", "church", "mosque"])], 1000),
                  "Nature reserve": (lu_gdf[lu_gdf.fclass == "nature_reserve"], 5000),
                  "Railway": (railways_gdf, 200),
                  "National highway": (roads_gdf[(roads_gdf.ref.str.startswith("NH") == True) | (roads_gdf.ref.str.startswith("NE") == True)], 300),
                  "State highway": (roads_gdf[roads_gdf.ref.str.startswith("SH") == True], 300),
                  "District highway": (roads_gdf[roads_gdf.ref.str.startswith("MDR") == True], 100),
                  "Inter kiln": (kilns_gdf.copy(), 800),
  }
elif state == "west_bengal":
  osm_dict = {
    "Habitation": (lu_gdf[lu_gdf.fclass == "residential"], 800),
    "Orchard": (lu_gdf[lu_gdf.fclass == "orchard"], 800),
    "Inter kiln": (kilns_gdf.copy(), 300),
    "Hospital_OSM": (building_gdf[building_gdf['type'] == "hospital"], 1000),
                  "Hospital_Gov": (hospital_data_gov_gdf, 1000),
                   "School": (building_gdf[building_gdf['type'] == "school"], 1000),
                   "Religious places": (building_gdf[building_gdf['type'].isin(["temple", "church", "mosque"])], 1000),
                   "Nature reserve": (lu_gdf[lu_gdf.fclass == "nature_reserve"], 5000),
  "Railway": (railways_gdf, 200),
  "National highway": (roads_gdf[(roads_gdf.ref.str.startswith("NH") == True) | (roads_gdf.ref.str.startswith("NE") == True)], 200),
                  "State highway": (roads_gdf[roads_gdf.ref.str.startswith("SH") == True], 200),
                  "River": (waterways_gdf[waterways_gdf.fclass == "river"], 200),
  }
elif state == "haryana":
  osm_dict = {
    "Habitation": (lu_gdf[lu_gdf.fclass == "residential"], 800),
    "Orchard": (lu_gdf[lu_gdf.fclass == "orchard"], 800),
    "Inter kiln": (kilns_gdf.copy(), 1000),
    "Hospital_OSM": (building_gdf[building_gdf['type'] == "hospital"], 1000),
                  "Hospital_Gov": (hospital_data_gov_gdf, 1000),
                   "School": (building_gdf[building_gdf['type'] == "school"], 1000),
                   "Nature reserve": (lu_gdf[lu_gdf.fclass == "nature_reserve"], 1000),
  }
elif state == "punjab":
  osm_dict = {
    "Habitation": (lu_gdf[lu_gdf.fclass == "residential"], 500),
    "Orchard": (lu_gdf[lu_gdf.fclass == "orchard"], 800),
    "Inter kiln": (kilns_gdf.copy(), 1000),
    "State highway": (roads_gdf[roads_gdf.ref.str.startswith("SH") == True], 100),
  }  
else:
  raise ValueError("State not supported")

for key, value in osm_dict.items():
    print(key, len(value[0]))

In [16]:
for key, (gdf, threshold) in tqdm(osm_dict.items()):
    gdf = gdf.to_crs(kilns_gdf.crs).reset_index(drop=True)
    if key == "Inter kiln":
        intersection_gdf = gpd.sjoin(kilns_gdf, kilns_gdf, predicate="dwithin", distance=threshold)
        intersection_gdf = intersection_gdf[intersection_gdf.index < intersection_gdf.index_right]
    else:
        intersection_gdf = gpd.sjoin(gdf, kilns_gdf, predicate="dwithin", distance=threshold)
    kilns_gdf[key] = kilns_gdf.index.isin(intersection_gdf.index_right).astype(bool)
    kilns_gdf[f"{key}_threshold"] = threshold
kilns_gdf["non_compliant"] = kilns_gdf[list(osm_dict.keys())].any(axis=1)

In [17]:
kilns_gdf.non_compliant.value_counts()

In [18]:
len(kilns_gdf)

In [19]:
rules = list(osm_dict.keys())
thresholds = [f"{key}_threshold" for key in rules]
kilns_gdf[['class_name', 'center_lat', 'center_lon', 'geometry', *rules, *thresholds, 'non_compliant']].to_file(f"../compliance/{state}.geojson", driver='GeoJSON')

## Combine all

In [26]:
name_mapping = {"uttar_pradesh": "Uttar Pradesh", "bihar": "Bihar", "west_bengal": "West Bengal", "haryana": "Haryana", "punjab": "Punjab"}

result_df = pd.DataFrame(columns=list(name_mapping.values()))

for state in ["uttar_pradesh", "bihar", "west_bengal", "haryana", "punjab"]:
    compliance_gdf = gpd.read_file(f"../compliance/{state}.geojson")
    for col in compliance_gdf:
        if (col not in ["class_name", "center_lat", "center_lon", "geometry"]) and (not col.endswith("_threshold")):
            result_df.loc[col, name_mapping[state]] = compliance_gdf[col].sum()

# result_df = result_df.fillna("-")
# display(result_df)
result_df.loc["Hospital"] = result_df.loc["Hospital_OSM"] + result_df.loc["Hospital_Gov"]
# remove hospital_OSM and hospital_Gov
result_df = result_df.drop(["Hospital_OSM", "Hospital_Gov"])
result_df.loc['Non compliant'] = result_df.loc['non_compliant']
result_df = result_df.drop('non_compliant')
result_df.loc["Brick Kiln count"] = None

for state in ["uttar_pradesh", "bihar", "west_bengal", "haryana", "punjab"]:
    compliance_gdf = gpd.read_file(f"../compliance/{state}.geojson")
    result_df.loc["Brick Kiln count", name_mapping[state]] = len(compliance_gdf)

result_df['Total'] = result_df.sum(axis=1)
result_df = result_df.sort_values("Total", ascending=False)
result_df = result_df.loc[result_df.index[2:].to_list() + result_df.index[:2].to_list()]

# exchange last two row positions
result_df = result_df.loc[result_df.index[:-2].to_list() + result_df.index[-1:].to_list() + result_df.index[-2:-1].to_list()]
result_df.loc["Percentage violations"] = result_df.loc["Non compliant"] / result_df.loc["Brick Kiln count"] * 100

result_df = result_df.fillna(-1)
# add a multi-level column header for states
result_df.columns = pd.MultiIndex.from_product([["States"], result_df.columns])
# add a multi-level row header for classes
result_df.index = pd.MultiIndex.from_product([["Criterion"], result_df.index])

# round everything till last row
for row in result_df.index:
    result_df.loc[row] = result_df.loc[row].apply(lambda x: f"{int(round(x))}")

latex_code = result_df.to_latex()
print(latex_code.replace("llllllll", "llrrrrrr").replace("{6}{r}", "{5}{c}").replace("[t]{16}", "[c]{13}").replace("\cline{1-8}", "").replace("-1", "-").replace("& Non compliant", "\midrule\n & Non compliant"))

In [32]:
distance_df = pd.DataFrame(columns=list(name_mapping.values()))

for state in ["uttar_pradesh", "bihar", "west_bengal", "haryana", "punjab"]:
    compliance_gdf = gpd.read_file(f"../compliance/{state}.geojson")
    for col in compliance_gdf:
        if col not in ["class_name", "center_lat", "center_lon", "geometry"] and col.endswith("_threshold"):
            distance_df.loc[col.replace("_threshold", ""), name_mapping[state]] = compliance_gdf[col].iloc[0].item()
    
distance_df = distance_df.loc[[key for key in result_df.index.get_level_values(1) if key in distance_df.index]]
distance_df = distance_df.fillna("-")
distance_df.index.name = "Criterion"
distance_df = distance_df.reset_index()
print(distance_df.to_latex(index=False).replace("llllll", "lrrrrr"))