# Peek the Geo json File

In [1]:
import os
import json
# import urllib
import numpy as np
import geopandas as gpd
import pandas as pd
from urllib.request import urlopen

## The Geo data is downloaded from aliyun

Thanks the website of [https://geo.datav.aliyun.com/areas_v2]

In [2]:
local_dir = 'C:\\Sync\\GeoData'

def full_url(adcode):
    # Make full URL of the [adcode]
    json_name = f'{adcode}_full.json'
    return f'https://geo.datav.aliyun.com/areas_v2/bound/{json_name}'

def fetch_adcode(adcode=100000):
    # Request on the website for fetching the geojson of [adcode]
    url = full_url(adcode)
    
    local = os.path.join(local_dir, os.path.basename(url))
    if os.path.isfile(local):
        print(f'Found file in {local}')
        return json.load(open(local))
    
    with urlopen(url) as response:
        print(f'Requiring json from {url}')
        obj = json.load(response)
        with open(local, 'w') as f:
            json.dump(obj, f)
        return obj

def parse_features(geojson):
    # Parse features section in [geojson],
    # it supports to be the Geo features
    features = gpd.GeoDataFrame.from_features(geojson['features'])
    features = pd.DataFrame(features)
    features['geometry'] = '--'
    return features

## Building a quick-to-check DataFrame of Chinese Geo Data
The DataFrame of main_df will store the features of every avaiable provinces, cities and districts in China.

The DataFrame will start from the collection of provinces, and iteratively being filled based on the features.

In [3]:
geojson = fetch_adcode()
main_df = parse_features(geojson)
main_df

Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/100000_full.json


Unnamed: 0,geometry,adcode,name,center,centroid,childrenNum,level,parent,subFeatureIndex,acroutes,adchar
0,--,110000,北京市,"[116.405285, 39.904989]","[116.41995, 40.18994]",16.0,province,{'adcode': 100000},0.0,[100000],
1,--,120000,天津市,"[117.190182, 39.125596]","[117.347019, 39.28803]",16.0,province,{'adcode': 100000},1.0,[100000],
2,--,130000,河北省,"[114.502461, 38.045474]",,11.0,province,{'adcode': 100000},2.0,[100000],
3,--,140000,山西省,"[112.549248, 37.857014]","[112.304761, 37.618555]",11.0,province,{'adcode': 100000},3.0,[100000],
4,--,150000,内蒙古自治区,"[111.670801, 40.818311]","[114.077404, 44.331072]",12.0,province,{'adcode': 100000},4.0,[100000],
5,--,210000,辽宁省,"[123.429096, 41.796767]","[122.605251, 41.299975]",14.0,province,{'adcode': 100000},5.0,[100000],
6,--,220000,吉林省,"[125.3245, 43.886841]","[126.171249, 43.70394]",9.0,province,{'adcode': 100000},6.0,[100000],
7,--,230000,黑龙江省,"[126.642464, 45.756967]","[127.693016, 48.04047]",13.0,province,{'adcode': 100000},7.0,[100000],
8,--,310000,上海市,"[121.472644, 31.231706]","[121.438734, 31.07256]",16.0,province,{'adcode': 100000},8.0,[100000],
9,--,320000,江苏省,"[118.767413, 32.041544]","[119.486395, 32.983908]",13.0,province,{'adcode': 100000},9.0,[100000],


In [4]:
geojson = fetch_adcode()
main_df = parse_features(geojson)

known_adcodes = set()
while len(main_df) > len(known_adcodes):
    for i in range(len(main_df)):
        se = main_df.iloc[i]
        adcode = se['adcode']

        if adcode in known_adcodes:
            continue
        known_adcodes.add(adcode)

        if se['childrenNum'] > 0:
            main_df = pd.concat([main_df, parse_features(fetch_adcode(adcode))], axis=0)

    main_df.index = range(len(main_df))

main_df.to_json(os.path.join(local_dir, 'main.json'))
main_df

Found file in C:\Sync\GeoData\100000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/110000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/120000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/130000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/140000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/150000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/210000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/220000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/230000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/310000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/330000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/boun

Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/231200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/232700_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320100_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320300_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320400_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320500_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320600_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320700_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320800_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/320900_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/321000_full.json
Requiring json from https://

Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/420900_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/421000_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/421100_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/421200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/421300_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/422800_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430100_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430300_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430400_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430500_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/430600_full.json
Requiring json from https://

Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540100_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540300_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540400_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540500_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/540600_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/542500_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/610100_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/610200_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/610300_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/610400_full.json
Requiring json from https://geo.datav.aliyun.com/areas_v2/bound/610500_full.json
Requiring json from https://

Unnamed: 0,geometry,adcode,name,center,centroid,childrenNum,level,parent,subFeatureIndex,acroutes,adchar
0,--,110000,北京市,"[116.405285, 39.904989]","[116.41995, 40.18994]",16.0,province,{'adcode': 100000},0.0,[100000],
1,--,120000,天津市,"[117.190182, 39.125596]","[117.347019, 39.28803]",16.0,province,{'adcode': 100000},1.0,[100000],
2,--,130000,河北省,"[114.502461, 38.045474]",,11.0,province,{'adcode': 100000},2.0,[100000],
3,--,140000,山西省,"[112.549248, 37.857014]","[112.304761, 37.618555]",11.0,province,{'adcode': 100000},3.0,[100000],
4,--,150000,内蒙古自治区,"[111.670801, 40.818311]","[114.077404, 44.331072]",12.0,province,{'adcode': 100000},4.0,[100000],
...,...,...,...,...,...,...,...,...,...,...,...
3236,--,654322,富蕴县,"[89.524993, 46.993106]","[89.386618, 46.532364]",0.0,district,{'adcode': 654300},2.0,"[100000, 650000, 654300]",
3237,--,654323,福海县,"[87.494569, 47.113128]","[88.046601, 46.362515]",0.0,district,{'adcode': 654300},3.0,"[100000, 650000, 654300]",
3238,--,654324,哈巴河县,"[86.418964, 48.059284]","[86.402485, 48.310203]",0.0,district,{'adcode': 654300},4.0,"[100000, 650000, 654300]",
3239,--,654325,青河县,"[90.381561, 46.672446]","[90.39768, 46.263028]",0.0,district,{'adcode': 654300},5.0,"[100000, 650000, 654300]",


In [5]:
# main_df['adcode'] = main_df.adcode.map(str)
# main_df = main_df.sort_values(by='adcode')
# main_df

In [6]:
df = pd.read_json(os.path.join(local_dir, 'main.json'))
df

Unnamed: 0,geometry,adcode,name,center,centroid,childrenNum,level,parent,subFeatureIndex,acroutes,adchar
0,--,110000,北京市,"[116.405285, 39.904989]","[116.41995, 40.18994]",16.0,province,{'adcode': 100000},0.0,[100000],
1,--,120000,天津市,"[117.190182, 39.125596]","[117.347019, 39.28803]",16.0,province,{'adcode': 100000},1.0,[100000],
2,--,130000,河北省,"[114.502461, 38.045474]",,11.0,province,{'adcode': 100000},2.0,[100000],
3,--,140000,山西省,"[112.549248, 37.857014]","[112.304761, 37.618555]",11.0,province,{'adcode': 100000},3.0,[100000],
4,--,150000,内蒙古自治区,"[111.670801, 40.818311]","[114.077404, 44.331072]",12.0,province,{'adcode': 100000},4.0,[100000],
...,...,...,...,...,...,...,...,...,...,...,...
3236,--,654322,富蕴县,"[89.524993, 46.993106]","[89.386618, 46.532364]",0.0,district,{'adcode': 654300},2.0,"[100000, 650000, 654300]",
3237,--,654323,福海县,"[87.494569, 47.113128]","[88.046601, 46.362515]",0.0,district,{'adcode': 654300},3.0,"[100000, 650000, 654300]",
3238,--,654324,哈巴河县,"[86.418964, 48.059284]","[86.402485, 48.310203]",0.0,district,{'adcode': 654300},4.0,"[100000, 650000, 654300]",
3239,--,654325,青河县,"[90.381561, 46.672446]","[90.39768, 46.263028]",0.0,district,{'adcode': 654300},5.0,"[100000, 650000, 654300]",
