In [1]:
import os
import json
from pprint import pprint
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
import ast
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import re
from pyproj import Proj, transform

In [2]:
state_name = "Odisha"
states = {"Odisha": {"code": 21, "link": "https://bhunakshaodisha.nic.in/"}}

In [3]:
def compile_extent_shapefile(state, file_path):

    with open(file_path, "r", encoding="utf8") as path:
        state_dict = json.loads(path.read())

    flat_villages = []
    for district in tqdm(state_dict.keys()):
        for taluk in state_dict[district].keys():
            for ri in state_dict[district][taluk].keys():
                for village in state_dict[district][taluk][ri].keys():
                    for sheet in state_dict[district][taluk][ri][village].keys():
                        village_obj = state_dict[district][taluk][ri][village][sheet]
                        flat_villages.append([district, taluk, ri, village, sheet, village_obj["giscode"], village_obj["extent"], village_obj["last_updated"]])

    df = pd.DataFrame(flat_villages, columns=["district", "taluk", "ri", "village", "sheet", "giscode", "extent", "last_updated"])
    df["geometry"] = df.apply(lambda x: Polygon([(x["extent"][0], x["extent"][2]), 
                                                 (x["extent"][0], x["extent"][3]), 
                                                 (x["extent"][1], x["extent"][3]), 
                                                 (x["extent"][1], x["extent"][2])]) if (len(x["extent"]) >= 4 and x["extent"][3] is not None) else None, axis=1)
    df = df.set_geometry('geometry')
    df.drop(["extent"], axis=1, inplace=True)
    df = df.set_crs('epsg:4326')
    df.to_file("./districts/village_extents.shp")

    return df


In [4]:
extent_json = f"./village_extents.json"

# Populates ./districts/village_extents.shp which is a map of all villages by rough extent
extents = compile_extent_shapefile(state_name, extent_json)
extents = gpd.read_file("./districts/village_extents.shp")

100%|██████████| 30/30 [00:00<00:00, 51.30it/s]
  df.to_file("./districts/village_extents.shp")
  ogr_write(


In [5]:
def compile_plots_shapefile(state, file_list, edf):

    gdf, noplots, no204s = [], [], []
    for path in tqdm(file_list):

        plot_dfs = []

        try:
            with open(f"./villages/{path}", "r") as file_path:
                village_dict = json.loads(file_path.read())
        except:
            print(f"Failed to open {path}")
            continue

        plot_dfs.append(gpd.GeoDataFrame.from_dict(village_dict['plots'], orient="index"))

        df = pd.concat(plot_dfs, ignore_index=True)
        if df.empty:
            noplots.append(path.split(".")[0])
            continue

        try:
            df["geometry"] = df["extent"]
            # df = df.set_geometry('geometry')
            # df.set_crs("epsg:32643", inplace=True)
            df["giscode"] = path

            gdf.append(df)
        except:
            no204s.append(path.split(".")[0])
    
    print(f"No plots found for {len(noplots)} files: {noplots}")
    print(f"204 errors for {len(no204s)} files: {no204s}")

    for district in tqdm(edf["district"].apply(lambda x: x[:3]).unique()):

        district_edf = edf[edf["district"].apply(lambda x: x[:3]) == district]
        district_dfs = []

        for df in gdf:
            if df.loc[0, "giscode"] in district_edf["giscode"].values:
                district_dfs.append(df)

        if not district_dfs:
            print(f"No plots found for {district}")
            continue
        
        villages = pd.concat(district_dfs, ignore_index=True).drop(["owner_plots", "extent"], axis=1)
        # villages = villages.to_crs('epsg:4326')

        merged = pd.merge(villages, district_edf, on="giscode", how="left")
        merged["geometry"] = merged["geometry_x"]
        # merged.set_geometry('geometry', inplace=True)
        merged.drop(["geometry_x", "geometry_y"], axis=1, inplace=True)

        merged["link"] = merged["link"].apply(lambda x: x.split('href="')[1].split('>Map')[0].strip()[:-1])
        merged.to_csv(f"./districts/{district.split(',')[0]}.csv")
        # merged.to_file(f"./districts/{district.split(',')[0]}.shp")


    # villages = pd.concat(gdf, ignore_index=True).drop(["owner_plots", "extent"], axis=1)
    # villages = villages.to_crs('epsg:4326')

    # villages["link"] = villages["link"].apply(lambda x: x.split('href="')[1].split('>Map')[0].strip()[:-1])
    # villages.to_file("./districts/villages.shp")

    # return villages


In [6]:
plot_files = [file for file in os.listdir(f"./villages") if file.endswith(".json")]

# Populates ./datasets/villages.shp which is a map of all landholdings by exact polygons
# compile_plots_shapefile(state_name, plot_files, extents)