In [1]:
import requests
import json
import os
import glob
import geopandas as gpd
import pandas as pd
import shutil

In [2]:
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [3]:
map_url = "https://grammanchitra.gov.in/#"

In [4]:
config_url = "https://grammanchitra.gov.in/js/config.js"

In [5]:
r = requests.get(config_url)

In [6]:
token = r.text.split('dlayertoken = "')[1].split('"')[0].strip()

In [7]:
token

'Swk5CQCB-pnToLpxQlCjkPshu0eLdlAEE6dHaV7CeMT--7V02xPX0kvbHJG-1iaS59xv8Vev63mhjQ-nsmpYTQ..'

In [8]:
mod_headers = {
    "referer": map_url,
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36 Edg/86.0.622.63"
}

In [9]:
layers = [
    "https://mapservice.gov.in/mapserviceserv176/rest/services/Panchayat/panchayat_admin/MapServer"
]

In [10]:
params = {
    "Token": token,
    "f": "pjson"
}

In [11]:
max_record_count_mapping = {
    0: 1,
    1: 50,
    2: 100,
    3: 300,
#     7: 300
}

In [12]:
output_folder = "output"

In [13]:
def make_folder(folderpath):
    if folderpath == "":
        return
    if not os.path.exists(os.path.dirname(folderpath)):
        make_folder(os.path.dirname(folderpath))
    if not os.path.exists(folderpath):
        os.mkdir(folderpath)

In [14]:
def scrape(layer_url):
    metadata = requests.get(layer_url, params=params, verify=False, headers=mod_headers).json()
#     print(metadata)
    # Make Folder
    folderpath  = os.path.join(os.path.abspath(output_folder), 'scrape', metadata['name'].replace(' ', '_'))
    make_folder(folderpath)
    # Scrape Now
    max_record_count = metadata['maxRecordCount']
    _max_record_count = max_record_count_mapping.get(metadata['id'], max_record_count)
    print("-- Using max_record_count: ", _max_record_count)
    gparams = {
        "Token": token,
        "f": "json",
        "where": "1=1",
        "returnGeometry": 'true',
        "returnTrueCurves": 'true',
        "spatialRel": "",
        "geometry": "",
        "geometryType": "",
        "esriGeometryEnvelope": "",
        "outFields": "*"
    }
    supports_pagination = metadata['advancedQueryCapabilities']['supportsPagination']
    if supports_pagination:
        # Get Count
        r = requests.get(layer_url+'/query', params={**gparams, 'returnCountOnly':'true'}, verify=False, headers=mod_headers)
        if r.ok:
    #         print(r.url)
            count = r.json()['count']
        else:
    #         print(r.url)
            raise e
    else:
        # Get OIDs
        r = requests.get(layer_url+'/query', params={**gparams, 'returnIdsOnly':'true'}, verify=False, headers=mod_headers)
        if r.ok:
    #         print(r.url)
            oids = r.json()["objectIds"]
            count = len(oids)
        else:
    #         print(r.url)
            raise e
    # Iterate
    print("-- Total Features: ", count)
    for i in range(0, count, _max_record_count):
        print(f'-- Scrapping: Progress {(i*100/count):0.2f} %', end='\r')
        feature_path = os.path.join(folderpath, f"{i}_{i+_max_record_count}.json")
#         print(feature_path)
        if not os.path.exists(feature_path):
            if supports_pagination:
                gparams["resultOffset"] = i
                gparams["resultRecordCount"] = _max_record_count
                features = requests.get(layer_url+'/query', params=gparams, verify=False, headers=mod_headers).json()
            else:
                gparams["objectIds"] = str(oids[i:i+_max_record_count])[1:-1]  
#                 print(layer_url+'/query')
#                 print(gparams)
                features = requests.post(layer_url+'/query', data=gparams, verify=False, headers=mod_headers).json()
            #
            with open(feature_path, 'w') as f:
                json.dump(features, f)
    #
    print(f'-- Scrapping: Progress 100 %               ', end='\n')
    return folderpath

In [15]:
def merge(scrapped_folder):
    folder_name = os.path.basename(scrapped_folder)
    outfolderpath  = os.path.join(os.path.abspath(output_folder), 'shapefiles', folder_name)
    json_files = glob.glob(os.path.join(scrapped_folder, '*.json'))
    print("-- Merging ", len(json_files), " Files")
    store = []
    for file in json_files:
        df = gpd.read_file(file)
        store.append(df)
    #
    _df = pd.concat(store).reset_index(drop=True)
    make_folder(outfolderpath)
    _df.to_file(outfolderpath)
    return outfolderpath

In [16]:
def zipit(shapefile_folder):
    print("-- Zipping ", shapefile_folder)
    b = os.getcwd()
    os.chdir(os.path.dirname(shapefile_folder))
    command = f"""zip -s 50m {os.path.basename(shapefile_folder)}.ZIP {os.path.basename(shapefile_folder)} -r"""
    os.system(command)
    shutil.rmtree(shapefile_folder, True)
    os.chdir(b)

In [None]:
for server_url in layers:
    r = requests.get(server_url, params=params, verify=False, headers=mod_headers)
    for lyr in r.json()['layers']:
        msg = f"\n-- Scrapping {lyr['name']}"
        lyr_url = server_url+'/'+str(lyr['id'])
        print(msg, lyr_url)
        #
        metadata = requests.get(lyr_url, params=params, verify=False, headers=mod_headers).json()
        fp = scrape(lyr_url)
        shp_fldr = merge(fp)
        zipit(shp_fldr)


-- Scrapping State https://mapservice.gov.in/mapserviceserv176/rest/services/Panchayat/panchayat_admin/MapServer/0
-- Using max_record_count:  1
-- Total Features:  37
-- Scrapping: Progress 100 %               
-- Merging  37  Files
-- Zipping  /home/sandy/codebase/scrape_grammanchitra.gov.in/output/shapefiles/State

-- Scrapping District https://mapservice.gov.in/mapserviceserv176/rest/services/Panchayat/panchayat_admin/MapServer/1
-- Using max_record_count:  50
-- Total Features:  727
-- Scrapping: Progress 100 %               
-- Merging  15  Files
-- Zipping  /home/sandy/codebase/scrape_grammanchitra.gov.in/output/shapefiles/District

-- Scrapping Block https://mapservice.gov.in/mapserviceserv176/rest/services/Panchayat/panchayat_admin/MapServer/2
-- Using max_record_count:  100
-- Total Features:  6666
-- Scrapping: Progress 100 %               
-- Merging  67  Files
-- Zipping  /home/sandy/codebase/scrape_grammanchitra.gov.in/output/shapefiles/Block

-- Scrapping GP https://map

In [66]:
a = {
    'Token': 'Swk5CQCB-pnToLpxQlCjkPshu0eLdlAEE6dHaV7CeMT--7V02xPX0kvbHJG-1iaS59xv8Vev63mhjQ-nsmpYTQ..', 
    'f': 'json', 
    'where': '1=1', 
    'returnGeometry': 'true', 
    'returnTrueCurves': 'true', 
    'spatialRel': '', 
    'geometry': '', 
    'geometryType': '', 
    'esriGeometryEnvelope': '', 
    'outFields': '*', 
    'objectIds': str([16500, 16501, 16502, 16503, 16504, 16505, 16506, 16507, 16508, 16509, 16510, 16511, 16512, 16513, 16514, 16515, 16516, 16517, 16518, 16519, 16520, 16521, 16522, 16523, 16524, 16525, 16526, 16527, 16528, 16529, 16530, 16531, 16532, 16533, 16534, 16535, 16536, 16537, 16538, 16539, 16540, 16541, 16542, 16543, 16544, 16545, 16546, 16547, 16548, 16549, 16550, 16551, 16552, 16553, 16554, 16555, 16556, 16557, 16558, 16559, 16560, 16561, 16562, 16563, 16564, 16565, 16566, 16567, 16568, 16569, 16570, 16571, 16572, 16573, 16574, 16575, 16576, 16577, 16578, 16579, 16580, 16581, 16582, 16583, 16584, 16585, 16586, 16587, 16588, 16589, 16590, 16591, 16592, 16593, 16594, 16595, 16596, 16597, 16598, 16599, 16600, 16601, 16602, 16603, 16604, 16605, 16606, 16607, 16608, 16609, 16610, 16611, 16612, 16613, 16614, 16615, 16616, 16617, 16618, 16619, 16620, 16621, 16622, 16623, 16624, 16625, 16626, 16627, 16628, 16629, 16630, 16631, 16632, 16633, 16634, 16635, 16636, 16637, 16638, 16639, 16640, 16641, 16642, 16643, 16644, 16645, 16646, 16647, 16648, 16649, 16650, 16651, 16652, 16653, 16654, 16655, 16656, 16657, 16658, 16659, 16660, 16661, 16662, 16663, 16664, 16665, 16666, 16667, 16668, 16669, 16670, 16671, 16672, 16673, 16674, 16675, 16676, 16677, 16678, 16679, 16680, 16681, 16682, 16683, 16684, 16685, 16686, 16687, 16688, 16689, 16690, 16691, 16692, 16693, 16694, 16695, 16696, 16697, 16698, 16699, 16700, 16701, 16702, 16703, 16704, 16705, 16706, 16707, 16708, 16709, 16710, 16711, 16712, 16713, 16714, 16715, 16716, 16717, 16718, 16719, 16720, 16721, 16722, 16723, 16724, 16725, 16726, 16727, 16728, 16729, 16730, 16731, 16732, 16733, 16734, 16735, 16736, 16737, 16738, 16739, 16740, 16741, 16742, 16743, 16744, 16745, 16746, 16747, 16748, 16749, 16750, 16751, 16752, 16753, 16754, 16755, 16756, 16757, 16758, 16759, 16760, 16761, 16762, 16763, 16764, 16765, 16766, 16767, 16768, 16769, 16770, 16771, 16772, 16773, 16774, 16775, 16776, 16777, 16778, 16779, 16780, 16781, 16782, 16783, 16784, 16785, 16786, 16787, 16788, 16789, 16790, 16791, 16792, 16793, 16794, 16795, 16796, 16797, 16798, 16799])[1:-1]
}

In [67]:
r = requests.post(
    'https://mapservice.gov.in/mapserviceserv176/rest/services/Panchayat/panchayat_admin/MapServer/3/query',
    data=a,
    headers=mod_headers
)

In [68]:
len(r.json()['features'])

300

In [69]:
r.json()['features']

[{'attributes': {'dtname': 'TAMKUR',
   'stname': 'KARNATAKA',
   'stcode11': '29',
   'dtcode11': '571',
   'sdtcode11': '05537',
   'FID': 16500,
   'objectid': 232548,
   'blkcode11': ' ',
   'blkname': 'TUMAKURU',
   'blklgdcode': '5913',
   'gpcode': '220381',
   'gpname': 'Chikkaseebi',
   'sdtname': 'TAMKUR',
   'st_area_sh': 4580363.89971,
   'st_length_': 8102.25779782,
   'Tot_Pop': 7483},
  'geometry': {'rings': [[[8595609.2421, 1491792.4041999988],
     [8595836.773400001, 1491790.865600001],
     [8595993.2962, 1491792.267000001],
     [8596096.921799999, 1491806.0837000012],
     [8596173.600000001, 1491803.7919000015],
     [8596173.603999998, 1491803.7917999998],
     [8596189.9225, 1491609.365699999],
     [8596160.562600002, 1491391.2393000014],
     [8596094.2, 1491110.0604000017],
     [8596094.201900002, 1491110.056400001],
     [8596144.769000001, 1490956.3308999985],
     [8596186.033100002, 1490830.8896000013],
     [8596329.1767, 1490412.6605000012],
     [8596