In [1]:
# https://kodepos.nomor.net/_kodepos.php?_i=provinsi-kodepos
# document.body.onselectstart = function() {return true;};

In [2]:
import re
import json

In [3]:
def flatten(l):
    if not isinstance(l, list):
        return [l]
    out = []
    for v in l:
        out += flatten(v)
    return out

In [4]:
from collections import UserDict
class Map(UserDict):
    def __init__(self, kv_iter):
        d = {}
        if isinstance(kv_iter, dict) or isinstance(kv_iter, UserDict):
            kv_iter = kv_iter.items()
        for k, v in kv_iter:
            if k not in d:
                d[k] = []
            if not isinstance(v, list):
                v = [v]
            d[k] += v
        super().__init__(d)
    def mapk(self, transform_or_dict):
        out = {}
        if isinstance(transform_or_dict, dict) or isinstance(transform_or_dict, UserDict):
            transform = lambda k: transform_or_dict[k]
        else:
            transform = transform_or_dict
        for k, v in self.data.items():
            nks = transform(k)
            if not isinstance(nks, list):
                nks = [nks]
            for nk in nks:
                if nk not in out:
                    out[nk] = []
                if not isinstance(v, list):
                    v = [v]
                out[nk] += v
        for k, v in out.items():
            out[k] = list(set(flatten(v)))
        return Map(out.items())
    def mapv(self, transform_or_dict):
        out = {}
        if isinstance(transform_or_dict, dict) or isinstance(transform_or_dict, UserDict):
            transform = lambda k: transform_or_dict[k]
        else:
            transform = transform_or_dict
        for k, vs in self.data.items():
            for v in vs:
                nvs = transform(v)
                if not isinstance(nvs, list):
                    nvs = [nvs]
                if k not in out:
                    out[k] = []
                out[k] += nvs
        for k, v in out.items():
            out[k] = list(set(flatten(v)))
        return Map(out.items())
    def json(self):
        return json.dumps(self.data)
    def bj(self):
        failed = False
        for k, v in self.data.items():
            if len(v) != 1:
                print({k: v})
                failed = True
        return not failed
        
    def reverse(self):
        out = {}
        for k, vs in self.data.items():
            for v in vs:
                if v not in out:
                    out[v] = []
                out[v].append(k)
        for k, v in out.items():
            out[k] = list(set(flatten(v)))
        return Map(out.items())
    def __contains__(self, key):
        if isinstance(key, list):
            keys = key
        elif isinstance(key, dict) or isinstance(key, UserDict):
            keys = key.keys()
        else:
            keys = [key]
        return all([key in self.data for key in keys])
    def __iadd__(self, other):
        other = Map(other)
        for k, vs in other.items():
            if k not in self.data:
                self.data[k] = []
            self.data[k] += vs
        for k, v in self.data.items():
            self.data[k] = list(set(flatten(v)))
        return self
    def __radd__(self, other):
        out = Map(other)
        out += self
        return out
    def __add__(self, other):
        return self.__radd__(other)
    def asdict(self):
        if not self.bj():
            raise RuntimeError('map is not bijective')
        return {k: vs[0] for k, vs in self.data.items()}

    

In [5]:
c2p = [row.split('\t') for row in open('raw_c2p.txt', 'r').read().strip().split('\n')]

In [6]:
d2c = [row.split('\t') for row in open('raw_d2c.txt', 'r').read().strip().split('\n')]

In [7]:
mc2p = Map(c2p)
mc2p

{'Yogyakarta': ['DI Yogyakarta'], 'Yalimo': ['Papua'], 'Yahukimo': ['Papua'], 'Wonosobo': ['Jawa Tengah'], 'Wonogiri': ['Jawa Tengah'], 'Way Kanan': ['Lampung'], 'Waropen': ['Papua'], 'Wakatobi': ['Sulawesi Tenggara'], 'Wajo': ['Sulawesi Selatan'], 'Tulungagung': ['Jawa Timur'], 'Tulang Bawang Barat': ['Lampung'], 'Tulang Bawang': ['Lampung'], 'Tuban': ['Jawa Timur'], 'Tual': ['Maluku'], 'Trenggalek': ['Jawa Timur'], 'Toraja Utara': ['Sulawesi Selatan'], 'Tomohon': ['Sulawesi Utara'], 'Tolikara': ['Papua'], 'Toli-Toli': ['Sulawesi Tengah'], 'Tojo Una-Una': ['Sulawesi Tengah'], 'Toba Samosir': ['Sumatera Utara'], 'Timor Tengah Utara': ['Nusa Tenggara Timur (NTT)'], 'Timor Tengah Selatan': ['Nusa Tenggara Timur (NTT)'], 'Tidore Kepulauan': ['Maluku Utara'], 'Ternate': ['Maluku Utara'], 'Temanggung': ['Jawa Tengah'], 'Teluk Wondama': ['Papua Barat'], 'Teluk Bintuni': ['Papua Barat'], 'Tegal': ['Jawa Tengah', 'Jawa Tengah'], 'Tebo': ['Jambi'], 'Tebing Tinggi': ['Sumatera Utara'], 'Tasikmal

In [8]:
def stransform(k):
    o1 = k.strip().lower()
    o2 = re.sub(r'[^a-z0-9]', '', o1)
    return o2

In [9]:
# transform names, handles cases like Name ( AlternateName / AlternateName2 )
def gtransform(k):
    o1 = k.strip().lower()
    keys = re.split(r'\(([^)]*)\)', o1)
    if len(keys) > 1:
        return flatten([gtransform(key) for key in keys if len(key) > 0])
    keys = o1.split('/')
    if len(keys) > 1:
        return flatten([gtransform(key) for key in keys if len(key) > 0])
    o2 = re.sub(r'[^a-z0-9]', '', o1)
    return o2

In [10]:
[[k, stransform(k)] for k, v in c2p]

[['Yogyakarta', 'yogyakarta'],
 ['Yalimo', 'yalimo'],
 ['Yahukimo', 'yahukimo'],
 ['Wonosobo', 'wonosobo'],
 ['Wonogiri', 'wonogiri'],
 ['Way Kanan', 'waykanan'],
 ['Waropen', 'waropen'],
 ['Wakatobi', 'wakatobi'],
 ['Wajo', 'wajo'],
 ['Tulungagung', 'tulungagung'],
 ['Tulang Bawang Barat', 'tulangbawangbarat'],
 ['Tulang Bawang', 'tulangbawang'],
 ['Tuban', 'tuban'],
 ['Tual', 'tual'],
 ['Trenggalek', 'trenggalek'],
 ['Toraja Utara', 'torajautara'],
 ['Tomohon', 'tomohon'],
 ['Tolikara', 'tolikara'],
 ['Toli-Toli', 'tolitoli'],
 ['Tojo Una-Una', 'tojounauna'],
 ['Toba Samosir', 'tobasamosir'],
 ['Timor Tengah Utara', 'timortengahutara'],
 ['Timor Tengah Selatan', 'timortengahselatan'],
 ['Tidore Kepulauan', 'tidorekepulauan'],
 ['Ternate', 'ternate'],
 ['Temanggung', 'temanggung'],
 ['Teluk Wondama', 'telukwondama'],
 ['Teluk Bintuni', 'telukbintuni'],
 ['Tegal', 'tegal'],
 ['Tegal', 'tegal'],
 ['Tebo', 'tebo'],
 ['Tebing Tinggi', 'tebingtinggi'],
 ['Tasikmalaya', 'tasikmalaya'],
 ['T

In [11]:
mc2p = mc2p.mapk(stransform)
mc2p

{'yogyakarta': ['DI Yogyakarta'], 'yalimo': ['Papua'], 'yahukimo': ['Papua'], 'wonosobo': ['Jawa Tengah'], 'wonogiri': ['Jawa Tengah'], 'waykanan': ['Lampung'], 'waropen': ['Papua'], 'wakatobi': ['Sulawesi Tenggara'], 'wajo': ['Sulawesi Selatan'], 'tulungagung': ['Jawa Timur'], 'tulangbawangbarat': ['Lampung'], 'tulangbawang': ['Lampung'], 'tuban': ['Jawa Timur'], 'tual': ['Maluku'], 'trenggalek': ['Jawa Timur'], 'torajautara': ['Sulawesi Selatan'], 'tomohon': ['Sulawesi Utara'], 'tolikara': ['Papua'], 'tolitoli': ['Sulawesi Tengah'], 'tojounauna': ['Sulawesi Tengah'], 'tobasamosir': ['Sumatera Utara'], 'timortengahutara': ['Nusa Tenggara Timur (NTT)'], 'timortengahselatan': ['Nusa Tenggara Timur (NTT)'], 'tidorekepulauan': ['Maluku Utara'], 'ternate': ['Maluku Utara'], 'temanggung': ['Jawa Tengah'], 'telukwondama': ['Papua Barat'], 'telukbintuni': ['Papua Barat'], 'tegal': ['Jawa Tengah'], 'tebo': ['Jambi'], 'tebingtinggi': ['Sumatera Utara'], 'tasikmalaya': ['Jawa Barat'], 'tarakan':

In [12]:
mc2p.bj()

{'banjar': ['Kalimantan Selatan', 'Jawa Barat']}


False

In [13]:
mc2p['banjar'] = ["Jawa Barat"]

In [14]:
mc2p.bj()

True

In [15]:
c_synonyms = """
KOTA JAMBI -> jambi
KABUPATEN BOGOR -> bogor
KABUPATEN KARAWANG -> karawang
KABUPATEN BOGOR -> bogor
KOTA BANDUNG -> bandung
KABUPATEN BANDUNG BARAT -> bandungbarat
KABUPATEN SUKABUMI -> sukabumi
KADUPATEN GROBOGAN -> grobogan
KABUPATEN MOJOKERTO -> mojokerto
KABUPATEN JOMBANG -> jombang
KABUPATEN SAMPANG -> sampang
KABUPATEN SUKOHARJO -> sukoharjo
KOTA SALATIGA -> salatiga
KABUPATEN NGANJUK -> nganjuk
KABUPATEN KARANGASEM -> karangasem
KABUPATEN PANDEGLANG -> pandeglang
KOTA SAMARINDA -> samarinda
KABUPATEN KUTAI KARTANEGARA -> kutaikartanegara
KOTA BATAM -> batam
KOTA PALEMBANG -> palembang
KABUPATEN LOMBOK BARAT -> lombokbarat
KOTA PEKANBARU -> pekanbaru
KABUPATEN SIAK -> siak
KOTA SOLOK -> solok
KOTA MANADO -> manado
KOTA PADANG -> padang
KOTA TEBING TINGGI -> tebingtinggi
KABUPATEN MUARA ENIM -> muaraenim
KOTA TANGERANG -> tangerang
KABUPATEN LUMAJANG -> lumajang
"""

In [16]:
c_synonyms = Map([row.split(' -> ') for row in c_synonyms.strip().split('\n')]).mapk(stransform)
c_synonyms

{'kotajambi': ['jambi'], 'kabupatenbogor': ['bogor'], 'kabupatenkarawang': ['karawang'], 'kotabandung': ['bandung'], 'kabupatenbandungbarat': ['bandungbarat'], 'kabupatensukabumi': ['sukabumi'], 'kadupatengrobogan': ['grobogan'], 'kabupatenmojokerto': ['mojokerto'], 'kabupatenjombang': ['jombang'], 'kabupatensampang': ['sampang'], 'kabupatensukoharjo': ['sukoharjo'], 'kotasalatiga': ['salatiga'], 'kabupatennganjuk': ['nganjuk'], 'kabupatenkarangasem': ['karangasem'], 'kabupatenpandeglang': ['pandeglang'], 'kotasamarinda': ['samarinda'], 'kabupatenkutaikartanegara': ['kutaikartanegara'], 'kotabatam': ['batam'], 'kotapalembang': ['palembang'], 'kabupatenlombokbarat': ['lombokbarat'], 'kotapekanbaru': ['pekanbaru'], 'kabupatensiak': ['siak'], 'kotasolok': ['solok'], 'kotamanado': ['manado'], 'kotapadang': ['padang'], 'kotatebingtinggi': ['tebingtinggi'], 'kabupatenmuaraenim': ['muaraenim'], 'kotatangerang': ['tangerang'], 'kabupatenlumajang': ['lumajang']}

In [17]:
c_synonyms.bj()

True

In [18]:
c_synonyms.reverse() in mc2p

True

In [19]:
mc2p += c_synonyms.mapv(mc2p)

In [20]:
md2c = Map(d2c)
md2c

{'2 X 11 Enam Lingkuang': ['Padang Pariaman'], '2 X 11 Kayu Tanam': ['Padang Pariaman'], 'Abab': ['Penukal Abab Lematang Ilir'], 'Abang': ['Karangasem'], 'Abeli': ['Kendari'], 'Abenaho': ['Yalimo'], 'Abepura': ['Jayapura'], 'Abiansemal': ['Badung'], 'Aboy': ['Pegunungan Bintang'], 'Abuki': ['Konawe'], 'Abun': ['Tambrauw'], 'Abung Barat': ['Lampung Utara'], 'Abung Kunang': ['Lampung Utara'], 'Abung Pekurun': ['Lampung Utara'], 'Abung Selatan': ['Lampung Utara'], 'Abung Semuli': ['Lampung Utara'], 'Abung Surakarta': ['Lampung Utara'], 'Abung Tengah': ['Lampung Utara'], 'Abung Timur': ['Lampung Utara'], 'Abung Tinggi': ['Lampung Utara'], 'Adian Koting': ['Tapanuli Utara'], 'Adiluwih (Adi Luwih)': ['Pringsewu'], 'Adimulyo': ['Kebumen'], 'Adipala': ['Cilacap'], 'Adiwerna': ['Tegal'], 'Adonara': ['Flores Timur'], 'Adonara Barat': ['Flores Timur'], 'Adonara Tengah': ['Flores Timur'], 'Adonara Timur': ['Flores Timur'], 'Aek Bilah': ['Tapanuli Selatan'], 'Aek Kuasan': ['Asahan'], 'Aek Kuo': ['L

In [21]:
md2c = md2c.mapk(gtransform)
md2c

{'2x11enamlingkuang': ['Padang Pariaman'], '2x11kayutanam': ['Padang Pariaman'], 'abab': ['Penukal Abab Lematang Ilir'], 'abang': ['Karangasem'], 'abeli': ['Kendari'], 'abenaho': ['Yalimo'], 'abepura': ['Jayapura'], 'abiansemal': ['Badung'], 'aboy': ['Pegunungan Bintang'], 'abuki': ['Konawe'], 'abun': ['Tambrauw'], 'abungbarat': ['Lampung Utara'], 'abungkunang': ['Lampung Utara'], 'abungpekurun': ['Lampung Utara'], 'abungselatan': ['Lampung Utara'], 'abungsemuli': ['Lampung Utara'], 'abungsurakarta': ['Lampung Utara'], 'abungtengah': ['Lampung Utara'], 'abungtimur': ['Lampung Utara'], 'abungtinggi': ['Lampung Utara'], 'adiankoting': ['Tapanuli Utara'], 'adiluwih': ['Pringsewu'], 'adimulyo': ['Kebumen'], 'adipala': ['Cilacap'], 'adiwerna': ['Tegal'], 'adonara': ['Flores Timur'], 'adonarabarat': ['Flores Timur'], 'adonaratengah': ['Flores Timur'], 'adonaratimur': ['Flores Timur'], 'aekbilah': ['Tapanuli Selatan'], 'aekkuasan': ['Asahan'], 'aekkuo': ['Labuhanbatu Utara'], 'aekledong': ['A

In [22]:
md2p = md2c.mapv(stransform).mapv(mc2p)

In [23]:
md2p

{'2x11enamlingkuang': ['Sumatera Barat'], '2x11kayutanam': ['Sumatera Barat'], 'abab': ['Sumatera Selatan'], 'abang': ['Bali'], 'abeli': ['Sulawesi Tenggara'], 'abenaho': ['Papua'], 'abepura': ['Papua'], 'abiansemal': ['Bali'], 'aboy': ['Papua'], 'abuki': ['Sulawesi Tenggara'], 'abun': ['Papua Barat'], 'abungbarat': ['Lampung'], 'abungkunang': ['Lampung'], 'abungpekurun': ['Lampung'], 'abungselatan': ['Lampung'], 'abungsemuli': ['Lampung'], 'abungsurakarta': ['Lampung'], 'abungtengah': ['Lampung'], 'abungtimur': ['Lampung'], 'abungtinggi': ['Lampung'], 'adiankoting': ['Sumatera Utara'], 'adiluwih': ['Lampung'], 'adimulyo': ['Jawa Tengah'], 'adipala': ['Jawa Tengah'], 'adiwerna': ['Jawa Tengah'], 'adonara': ['Nusa Tenggara Timur (NTT)'], 'adonarabarat': ['Nusa Tenggara Timur (NTT)'], 'adonaratengah': ['Nusa Tenggara Timur (NTT)'], 'adonaratimur': ['Nusa Tenggara Timur (NTT)'], 'aekbilah': ['Sumatera Utara'], 'aekkuasan': ['Sumatera Utara'], 'aekkuo': ['Sumatera Utara'], 'aekledong': ['S

In [24]:
md2c.bj()

{'airhitam': ['Sarolangun', 'Lampung Barat']}
{'alama': ['Mimika', 'Nduga']}
{'ambalau': ['Sintang', 'Buru Selatan']}
{'ambarawa': ['Pringsewu', 'Semarang']}
{'ivnagari': ['Sijunjung', 'Agam']}
{'ampelgading': ['Pemalang', 'Malang']}
{'angsana': ['Pandeglang', 'Tanah Bumbu']}
{'arjasa': ['Jember', 'Sumenep', 'Situbondo']}
{'badau': ['Kapuas Hulu', 'Belitung']}
{'balusu': ['Barru', 'Toraja Utara']}
{'bandar': ['Batang', 'Pacitan', 'Simalungun', 'Bener Meriah']}
{'bandung': ['Serang', 'Tulungagung']}
{'bangko': ['Rokan Hilir', 'Merangin']}
{'pusako': ['Rokan Hilir', 'Siak']}
{'bangunpurba': ['Deli Serdang', 'Rokan Hulu']}
{'banjar': ['Banjar', 'Buleleng', 'Pandeglang']}
{'banjaran': ['Majalengka', 'Bandung']}
{'banjarsari': ['Ciamis', 'Surakarta', 'Lebak']}
{'banyumas': ['Pringsewu', 'Banyumas']}
{'banyuputih': ['Batang', 'Situbondo']}
{'baros': ['Serang', 'Sukabumi']}
{'batang': ['Jeneponto', 'Batang']}
{'batuampar': ['Seruyan', 'Batam', 'Kutai Timur', 'Kubu Raya', 'Tanah Laut']}
{'batu

False

In [25]:
mp2p = Map(json.loads(open('province-to-province.json', 'r').read()).items())

In [26]:
mp2p.bj()

True

In [27]:
md2p = md2p.mapv(mp2p)

In [28]:
mp2p

{'Nusa Tenggara Barat (NTB)': ['nusa tenggara barat'], 'Sumatera Barat': ['sumatera barat'], 'DI Yogyakarta': ['di yogyakarta'], 'DKI Jakarta': ['dki jakarta'], 'Sumatera Selatan': ['sumatera selatan'], 'Kalimantan Tengah': ['kalimantan tengah'], 'Bengkulu': ['bengkulu'], 'Gorontalo': ['gorontalo'], 'Nusa Tenggara Timur (NTT)': ['nusa tenggara timur'], 'Maluku Utara': ['maluku utara'], 'Aceh (NAD)': ['nanggroe aceh darussalam'], 'Jawa Timur': ['jawa timur'], 'Lampung': ['lampung'], 'Jambi': ['jambi'], 'Banten': ['banten'], 'Bali': ['bali'], 'Kalimantan Timur': ['kalimantan timur'], 'Sulawesi Utara': ['sulawesi utara'], 'Sulawesi Selatan': ['sulawesi selatan'], 'Jawa Barat': ['jawa barat'], 'Sulawesi Tenggara': ['sulawesi tenggara'], 'Sulawesi Barat': ['sulawesi barat'], 'Maluku': ['maluku'], 'Sumatera Utara': ['sumatera utara'], 'Kepulauan Bangka Belitung': ['bangka belitung'], 'Kepulauan Riau': ['kepulauan riau'], 'Papua Barat': ['papua barat'], 'Jawa Tengah': ['jawa tengah'], 'Sulawe

In [29]:
mc2p = mc2p.mapv(mp2p)

In [30]:
mp2p.reverse()

{'nusa tenggara barat': ['Nusa Tenggara Barat (NTB)'], 'sumatera barat': ['Sumatera Barat'], 'di yogyakarta': ['DI Yogyakarta'], 'dki jakarta': ['DKI Jakarta'], 'sumatera selatan': ['Sumatera Selatan'], 'kalimantan tengah': ['Kalimantan Tengah'], 'bengkulu': ['Bengkulu'], 'gorontalo': ['Gorontalo'], 'nusa tenggara timur': ['Nusa Tenggara Timur (NTT)'], 'maluku utara': ['Maluku Utara'], 'nanggroe aceh darussalam': ['Aceh (NAD)'], 'jawa timur': ['Jawa Timur'], 'lampung': ['Lampung'], 'jambi': ['Jambi'], 'banten': ['Banten'], 'bali': ['Bali'], 'kalimantan timur': ['Kalimantan Timur'], 'sulawesi utara': ['Sulawesi Utara'], 'sulawesi selatan': ['Sulawesi Selatan'], 'jawa barat': ['Jawa Barat'], 'sulawesi tenggara': ['Sulawesi Tenggara'], 'sulawesi barat': ['Sulawesi Barat'], 'maluku': ['Maluku'], 'sumatera utara': ['Sumatera Utara'], 'bangka belitung': ['Kepulauan Bangka Belitung'], 'kepulauan riau': ['Kepulauan Riau'], 'papua barat': ['Papua Barat'], 'jawa tengah': ['Jawa Tengah'], 'sulawe

In [31]:
synonyms = {
    "nusa tenggara barat": ["ntb"],
    "nusa tenggara timur": ["ntt"],
    "nanggroe aceh darussalam": ["aceh", "nad"],
    "dki jakarta": ["dki", "jakarta", "jakarta utara"],
    "di yogyakarta": ["daerah istimewa yogyakarta", "yogyakarta"],
    "jawa timur": ["east java"],
    "jawa barat": ["west java"],
    "sulawesi utara": ["sulut", "north sulawesi"],
    "sulawesi selatan": ["sulsel", "south sulawesi"],
    "sumatera utara": ["sumut", "north sumatera", "sumatra utara"],
    "sumatera barat": ["sumbar", "sumatra barat"],
    "sumatera selatan": ["sumsel", "south sumatera", "sumatra selatan"],
    "jawa tengah": ["jateng"],
    "jawa timur": ["jatim"],
    "kalimantan timur": ["kaltim"],
    "kalimantan tengah": ["kalteng"],
    "kepulauan riau": ["kepri"]
}

In [32]:
synonyms in mp2p.reverse()

True

In [33]:
mp2p.reverse() + synonyms

{'nusa tenggara barat': ['ntb', 'Nusa Tenggara Barat (NTB)'], 'nusa tenggara timur': ['ntt', 'Nusa Tenggara Timur (NTT)'], 'nanggroe aceh darussalam': ['Aceh (NAD)', 'nad', 'aceh'], 'dki jakarta': ['jakarta utara', 'jakarta', 'dki', 'DKI Jakarta'], 'di yogyakarta': ['daerah istimewa yogyakarta', 'DI Yogyakarta', 'yogyakarta'], 'jawa timur': ['Jawa Timur', 'jatim'], 'jawa barat': ['west java', 'Jawa Barat'], 'sulawesi utara': ['sulut', 'north sulawesi', 'Sulawesi Utara'], 'sulawesi selatan': ['sulsel', 'south sulawesi', 'Sulawesi Selatan'], 'sumatera utara': ['sumut', 'Sumatera Utara', 'sumatra utara', 'north sumatera'], 'sumatera barat': ['sumatra barat', 'Sumatera Barat', 'sumbar'], 'sumatera selatan': ['sumsel', 'Sumatera Selatan', 'south sumatera', 'sumatra selatan'], 'jawa tengah': ['jateng', 'Jawa Tengah'], 'kalimantan timur': ['kaltim', 'Kalimantan Timur'], 'kalimantan tengah': ['Kalimantan Tengah', 'kalteng'], 'kepulauan riau': ['Kepulauan Riau', 'kepri'], 'bengkulu': ['Bengkulu

In [34]:
mp2p = (mp2p.reverse() + synonyms).mapv(gtransform).reverse()

In [35]:
mp2p

{'nusatenggarabarat': ['nusa tenggara barat'], 'ntb': ['nusa tenggara barat'], 'ntt': ['nusa tenggara timur'], 'nusatenggaratimur': ['nusa tenggara timur'], 'nad': ['nanggroe aceh darussalam'], 'aceh': ['nanggroe aceh darussalam'], 'dki': ['dki jakarta'], 'dkijakarta': ['dki jakarta'], 'jakarta': ['dki jakarta'], 'jakartautara': ['dki jakarta'], 'daerahistimewayogyakarta': ['di yogyakarta'], 'diyogyakarta': ['di yogyakarta'], 'yogyakarta': ['di yogyakarta'], 'jatim': ['jawa timur'], 'jawatimur': ['jawa timur'], 'westjava': ['jawa barat'], 'jawabarat': ['jawa barat'], 'sulawesiutara': ['sulawesi utara'], 'sulut': ['sulawesi utara'], 'northsulawesi': ['sulawesi utara'], 'sulsel': ['sulawesi selatan'], 'southsulawesi': ['sulawesi selatan'], 'sulawesiselatan': ['sulawesi selatan'], 'sumut': ['sumatera utara'], 'northsumatera': ['sumatera utara'], 'sumatrautara': ['sumatera utara'], 'sumaterautara': ['sumatera utara'], 'sumatrabarat': ['sumatera barat'], 'sumaterabarat': ['sumatera barat'],

In [36]:
mc2p

{'yogyakarta': ['di yogyakarta'], 'yalimo': ['papua'], 'yahukimo': ['papua'], 'wonosobo': ['jawa tengah'], 'wonogiri': ['jawa tengah'], 'waykanan': ['lampung'], 'waropen': ['papua'], 'wakatobi': ['sulawesi tenggara'], 'wajo': ['sulawesi selatan'], 'tulungagung': ['jawa timur'], 'tulangbawangbarat': ['lampung'], 'tulangbawang': ['lampung'], 'tuban': ['jawa timur'], 'tual': ['maluku'], 'trenggalek': ['jawa timur'], 'torajautara': ['sulawesi selatan'], 'tomohon': ['sulawesi utara'], 'tolikara': ['papua'], 'tolitoli': ['sulawesi tengah'], 'tojounauna': ['sulawesi tengah'], 'tobasamosir': ['sumatera utara'], 'timortengahutara': ['nusa tenggara timur'], 'timortengahselatan': ['nusa tenggara timur'], 'tidorekepulauan': ['maluku utara'], 'ternate': ['maluku utara'], 'temanggung': ['jawa tengah'], 'telukwondama': ['papua barat'], 'telukbintuni': ['papua barat'], 'tegal': ['jawa tengah'], 'tebo': ['jambi'], 'tebingtinggi': ['sumatera utara'], 'tasikmalaya': ['jawa barat'], 'tarakan': ['kalimanta

In [37]:
md2p

{'2x11enamlingkuang': ['sumatera barat'], '2x11kayutanam': ['sumatera barat'], 'abab': ['sumatera selatan'], 'abang': ['bali'], 'abeli': ['sulawesi tenggara'], 'abenaho': ['papua'], 'abepura': ['papua'], 'abiansemal': ['bali'], 'aboy': ['papua'], 'abuki': ['sulawesi tenggara'], 'abun': ['papua barat'], 'abungbarat': ['lampung'], 'abungkunang': ['lampung'], 'abungpekurun': ['lampung'], 'abungselatan': ['lampung'], 'abungsemuli': ['lampung'], 'abungsurakarta': ['lampung'], 'abungtengah': ['lampung'], 'abungtimur': ['lampung'], 'abungtinggi': ['lampung'], 'adiankoting': ['sumatera utara'], 'adiluwih': ['lampung'], 'adimulyo': ['jawa tengah'], 'adipala': ['jawa tengah'], 'adiwerna': ['jawa tengah'], 'adonara': ['nusa tenggara timur'], 'adonarabarat': ['nusa tenggara timur'], 'adonaratengah': ['nusa tenggara timur'], 'adonaratimur': ['nusa tenggara timur'], 'aekbilah': ['sumatera utara'], 'aekkuasan': ['sumatera utara'], 'aekkuo': ['sumatera utara'], 'aekledong': ['sumatera utara'], 'aeknab

In [38]:
op2p = Map(json.loads(open('province-to-province.json', 'r').read()).items())

In [39]:
op2p.bj()

True

In [40]:
mi2p = Map({idx: v for idx, v in enumerate(op2p.values())})
mi2p

{0: ['nusa tenggara barat'], 1: ['sumatera barat'], 2: ['di yogyakarta'], 3: ['dki jakarta'], 4: ['sumatera selatan'], 5: ['kalimantan tengah'], 6: ['bengkulu'], 7: ['gorontalo'], 8: ['nusa tenggara timur'], 9: ['maluku utara'], 10: ['nanggroe aceh darussalam'], 11: ['jawa timur'], 12: ['lampung'], 13: ['jambi'], 14: ['banten'], 15: ['bali'], 16: ['kalimantan timur'], 17: ['sulawesi utara'], 18: ['sulawesi selatan'], 19: ['jawa barat'], 20: ['sulawesi tenggara'], 21: ['sulawesi barat'], 22: ['maluku'], 23: ['sumatera utara'], 24: ['bangka belitung'], 25: ['kepulauan riau'], 26: ['papua barat'], 27: ['jawa tengah'], 28: ['sulawesi tengah'], 29: ['riau'], 30: ['kalimantan barat'], 31: ['kalimantan utara'], 32: ['kalimantan selatan'], 33: ['papua']}

In [41]:
mp2i = mp2p.mapv(mi2p.reverse())

In [42]:
mc2i = mc2p.mapv(mi2p.reverse())

In [43]:
md2i = md2p.mapv(mi2p.reverse())

In [49]:
open('out/mp2i.json', 'w').write(mp2i.json())

1186

In [50]:
open('out/mc2i.json', 'w').write(mc2i.json())

10184

In [51]:
open('out/md2i.json', 'w').write(md2i.json())

136342

In [52]:
open('out/mi2p.json', 'w').write(mi2p.json())

824

In [48]:
md2p.bj()

{'airhitam': ['lampung', 'jambi']}
{'ambalau': ['maluku', 'kalimantan barat']}
{'ambarawa': ['jawa tengah', 'lampung']}
{'ampelgading': ['jawa timur', 'jawa tengah']}
{'angsana': ['kalimantan selatan', 'banten']}
{'badau': ['kalimantan barat', 'bangka belitung']}
{'bandar': ['jawa timur', 'sumatera utara', 'jawa tengah', 'nanggroe aceh darussalam']}
{'bandung': ['jawa timur', 'banten']}
{'bangko': ['riau', 'jambi']}
{'bangunpurba': ['riau', 'sumatera utara']}
{'banjar': ['jawa barat', 'bali', 'banten']}
{'banjarsari': ['jawa barat', 'jawa tengah', 'banten']}
{'banyumas': ['jawa tengah', 'lampung']}
{'banyuputih': ['jawa timur', 'jawa tengah']}
{'baros': ['jawa barat', 'banten']}
{'batang': ['jawa tengah', 'sulawesi selatan']}
{'batuampar': ['kalimantan selatan', 'kalimantan tengah', 'kalimantan timur', 'kepulauan riau', 'kalimantan barat']}
{'batuputih': ['sulawesi tenggara', 'lampung', 'jawa timur', 'kalimantan timur', 'nusa tenggara timur']}
{'bayan': ['nusa tenggara barat', 'jawa te

False