In [1]:
import os
import json
from pprint import pprint
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
import ast
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import re
from pyproj import Proj, transform

In [None]:
state_name = "Odisha"
states = {"Odisha": {"code": 21, "link": "https://bhunakshaodisha.nic.in/"}}

In [25]:
def compile_extent_shapefile(state, file_path):

    with open(file_path, "r", encoding="utf8") as path:
        state_dict = json.loads(path.read())

    flat_villages = []
    for district in tqdm(state_dict.keys()):
        for taluk in state_dict[district].keys():
            for ri in state_dict[district][taluk].keys():
                for village in state_dict[district][taluk][ri].keys():
                    for sheet in state_dict[district][taluk][ri][village].keys():
                        village_obj = state_dict[district][taluk][ri][village][sheet]
                        flat_villages.append([district, taluk, ri, village, sheet, village_obj["giscode"], village_obj["extent"], village_obj["last_updated"]])

    df = pd.DataFrame(flat_villages, columns=["district", "taluk", "ri", "village", "sheet", "giscode", "extent", "last_updated"])
    df["geometry"] = df.apply(lambda x: Polygon([(x["extent"][0], x["extent"][2]), 
                                                 (x["extent"][0], x["extent"][3]), 
                                                 (x["extent"][1], x["extent"][3]), 
                                                 (x["extent"][1], x["extent"][2])]) if (len(x["extent"]) >= 4 and x["extent"][3] is not None) else None, axis=1)
    df = df.set_geometry('geometry')
    df.drop(["extent"], axis=1, inplace=True)
    df = df.set_crs('epsg:4326')
    df.to_file("./districts/village_extents.shp")

    return df


In [30]:
extent_json = f"./village_extents.json"

# Populates ./districts/village_extents.shp which is a map of all villages by rough extent
extents = compile_extent_shapefile(state_name, extent_json)
extents = gpd.read_file("./districts/village_extents.shp")

100%|██████████| 30/30 [00:00<00:00, 37.08it/s]
  df.to_file("./districts/village_extents.shp")
  ogr_write(


In [6]:
def compile_plots_shapefile(state, file_list, edf):

    gdf, noplots, no204s = [], [], []
    for path in tqdm(file_list):

        plot_dfs = []

        try:
            with open(f"./villages/{path}", "r") as file_path:
                village_dict = json.loads(file_path.read())
        except:
            print(f"Failed to open {path}")
            continue

        plot_dfs.append(gpd.GeoDataFrame.from_dict(village_dict['plots'], orient="index"))

        df = pd.concat(plot_dfs, ignore_index=True)
        if df.empty:
            noplots.append(path.split(".")[0])
            continue

        try:
            df["geometry"] = gpd.GeoSeries.from_wkt(df["geometry"])
            df = df.set_geometry('geometry')
            df.set_crs("epsg:32643", inplace=True)
            df["giscode"] = path[:-5]

            gdf.append(df)
        except:
            no204s.append(path.split(".")[0])
    
    print(f"No plots found for {len(noplots)} files: {noplots}")
    print(f"204 errors for {len(no204s)} files: {no204s}")

    for district in tqdm(edf["district"].apply(lambda x: x[:3]).unique()):

        district_edf = edf[edf["district"].apply(lambda x: x[:3]) == district]
        district_dfs = []

        for df in gdf:
            if df.loc[0, "giscode"] in district_edf["giscode"].values:
                district_dfs.append(df)

        if not district_dfs:
            continue
        
        villages = pd.concat(district_dfs, ignore_index=True).drop(["owner_plots", "extent"], axis=1)
        villages = villages.to_crs('epsg:4326')

        merged = pd.merge(villages, district_edf, on="giscode", how="left")
        merged["geometry"] = merged["geometry_x"]
        merged.set_geometry('geometry', inplace=True)
        merged.drop(["geometry_x", "geometry_y"], axis=1, inplace=True)

        merged["link"] = merged["link"].apply(lambda x: x.split('href="')[1].split('>Map')[0].strip()[:-1])
        merged.to_file(f"./districts/{district.split(',')[0]}.shp")


    # villages = pd.concat(gdf, ignore_index=True).drop(["owner_plots", "extent"], axis=1)
    # villages = villages.to_crs('epsg:4326')

    # villages["link"] = villages["link"].apply(lambda x: x.split('href="')[1].split('>Map')[0].strip()[:-1])
    # villages.to_file("./districts/villages.shp")

    # return villages


In [6]:
plot_files = [file for file in os.listdir(f"./villages") if file.endswith(".json")]

# Populates ./datasets/villages.shp which is a map of all landholdings by exact polygons
compile_plots_shapefile(state_name, plot_files, extents)

 30%|██▉       | 12183/41168 [02:57<15:31, 31.12it/s]

Failed to open RVM1005271000050121770000.json


 42%|████▏     | 17214/41168 [05:15<07:12, 55.37it/s]  

Failed to open RVM1404271400040172970000.json


 86%|████████▋ | 35544/41168 [14:11<02:40, 34.93it/s]  

Failed to open RVM3006273000060371200000.json


100%|██████████| 41168/41168 [17:45<00:00, 38.66it/s]


No plots found for 360 files: ['RVM0101270100010001000000', 'RVM0101270100010001280000', 'RVM0101270100010001300000', 'RVM0102270100020002090000', 'RVM0104270100040004500000', 'RVM0201270200010009590000', 'RVM0201270200010009710000', 'RVM0201270200010009840000', 'RVM0202270200020012160000', 'RVM0203270200030012850000', 'RVM0203270200030013070000', 'RVM0203270200030013260000', 'RVM0203270200030013420000', 'RVM0203270200030013460000', 'RVM0203270200030013650000', 'RVM0203270200030013720000', 'RVM0203270200030013760000', 'RVM0203270200030013780000', 'RVM0203270200030013790000', 'RVM0203270200030013820000', 'RVM0203270200030013830000', 'RVM0203270200030013850000', 'RVM0203270200030013920000', 'RVM0203270200030014060000', 'RVM0203270200030014080000', 'RVM0203270200030014150000', 'RVM0203270200030014360000', 'RVM0204270200040015140000', 'RVM0304270300040020390000', 'RVM0304270300040020400000', 'RVM0314270300140028980000', 'RVM0315270300150031340000', 'RVM0402270400020033850000', 'RVM04072704

Total Area : 0.9500
Pot kharaba : 0.0200
Owner Name : रामधन मुकाजी सपकाळ
Khata No. : 179
---------------------------------
Survey No. : 11/अ
Total Area : 0.0000
Pot kharaba : 0.2200
Owner Name : वानप्रकल्प पाटबंधारे विभाग
Khata No. : 294
---------------------------------
Survey No. : 11/अ
Total Area : 0.4100
Pot kharaba : 0.0200
Owner Name : गोर्वधन मुकाजी सपकाळ
Khata No. : 377
---------------------------------
Survey No. : 11/अ/1
Total Area : 0.6500
Pot kharaba : 0.0000
Owner Name : मुरलीधर वासुदेव बाजारे
Khata No. : 150
---------------------------------
Survey No. : 11/अ/1
Total Area : 0.3400
Pot kharaba : 0.0000
Owner Name : रामधन मुकाजी सपकाळ
Khata No. : 179
---------------------------------
Survey No. : 11/अ/1
Total Area : 0.0000
Pot kharaba : 0.3600
Owner Name : वानप्रकल्प पाटबंधारे विभाग
Khata No. : 294
---------------------------------
Survey No. : 11/अ/1
Total Area : 1.0200
Pot kharaba : 0.0500
Owner Name : रुखमाबाई संजु डोमाळे (लग्ना पुर्वीचे नांव रुखमाबाई पिता रामा मोरे)
Kha

In [44]:
def shift_geometry(geometry):
    
    def translate_zones(match):
        x, y = float(match.group(1)), float(match.group(2))
        
        zone44 = Proj(proj='utm', zone=44, ellps='WGS84')
        zone43 = Proj(proj='utm', zone=43, ellps='WGS84')
        
        x, y = zone44(x, y, inverse=True)
        x, y = zone43(x, y)
        return f"{x} {y}"
    
    return re.sub(r'([\d.]+) ([\d.]+)', translate_zones, geometry)


In [46]:
mislabelled = extents[(extents["district"].apply(lambda x: int(x.split(",")[0])) > 7) & (extents["district"].apply(lambda x: int(x.split(",")[0])) < 15)]
mislabelled

Unnamed: 0,category,district,taluk,village,giscode,geometry
7891,"R,Rural","12,????????","02,??????","271200020134770000,??????",RVM1202271200020134770000,"POLYGON ((73.97943 20.49524, 73.97943 20.47198..."
7892,"R,Rural","12,????????","02,??????","271200020135480000,?????? ??",RVM1202271200020135480000,"POLYGON ((80.10031 20.16628, 80.10031 20.12647..."
7893,"R,Rural","12,????????","02,??????","271200020134790000,??????",RVM1202271200020134790000,"POLYGON ((74.02823 20.51827, 74.02823 20.43443..."
7894,"R,Rural","12,????????","02,??????","271200020134730000,?????",RVM1202271200020134730000,"POLYGON ((74.02223 20.51593, 74.02223 20.48916..."
7895,"R,Rural","12,????????","02,??????","271200020135360000,????????",RVM1202271200020135360000,"POLYGON ((80.03297 20.41331, 80.03297 20.39029..."
...,...,...,...,...,...,...
43576,"U,Urban","14,Yawatmal","01,Yawatmal CTSO","000000000000001410,Aarni",UCM1401000000000000001410,"POLYGON ((77.06986 21.10691, 77.06986 21.07416..."
43577,"U,Urban","14,Yawatmal","01,Yawatmal CTSO","000000000000001406,Digras",UCM1401000000000000001406,"POLYGON ((77.72695 20.10946, 77.72695 20.09215..."
43578,"U,Urban","14,Yawatmal","01,Yawatmal CTSO","000000000000001407,Pusad",UCM1401000000000000001407,"POLYGON ((77.58574 19.92846, 77.58574 19.89523..."
43579,"U,Urban","14,Yawatmal","01,Yawatmal CTSO","000000000000001416,Wani",UCM1401000000000000001416,"POLYGON ((78.96685 20.07014, 78.96685 20.04039..."


In [54]:
counter = 0
for giscode in tqdm(mislabelled["giscode"]):
    
    if (not os.path.exists(f"./villages/{giscode}.json")) or os.path.exists(f"./villages_new/{giscode}.json"):
        continue
        
    try:
        with open(f"./villages/{giscode}.json", "r") as file:
            village_dict = json.loads(file.read())
    except:
        counter += 1
        
    if village_dict["extent"][0] and village_dict["extent"][0] < 76.5:
        
        with open(f"./villages_backup/{giscode}.json", "w") as file:
            json.dump(village_dict, file)
        
        village_dict["extent"][0] += 6
        village_dict["extent"][1] += 6
        
        for plot in village_dict["plots"]:
            
            try:
                village_dict["plots"][plot]["extent"][0] += 6
                village_dict["plots"][plot]["extent"][1] += 6
                village_dict["plots"][plot]["geometry"] = shift_geometry(village_dict["plots"][plot]["geometry"])
            except:
                counter += 1
        
        with open(f"./villages_new/{giscode}.json", "w") as file:
            json.dump(village_dict, file)
            
print(f"Failed to update {counter} plots")

100%|██████████| 10630/10630 [52:25<00:00,  3.38it/s] 

Failed to update 71 plots



