In [141]:
import pandas as pd
import json
from copy import deepcopy
from jellyfish import jaro_winkler
from difflib import SequenceMatcher

In [142]:
df = pd.read_csv('psgc.csv', encoding='latin1')
df = df.loc[:, 'Province':'Municipality Code']
df.head()

Unnamed: 0,Province,Province Code,Municipality,Municipality Code
0,ILOCOS NORTE,12800000,ADAMS,12801000
1,ILOCOS NORTE,12800000,BACARRA,12802000
2,ILOCOS NORTE,12800000,BACARRA,12802000
3,ILOCOS NORTE,12800000,BACARRA,12802000
4,ILOCOS NORTE,12800000,BACARRA,12802000


In [143]:
# retain only unique values of provinces/municipalities and their respective area codes
provinces = df.loc[:, 'Province': 'Province Code'].drop_duplicates()
municipalities = df.loc[:, 'Municipality': 'Municipality Code'].drop_duplicates()

# test location input
loc = ["manila", "philippines"]

In [144]:
def search(data, locations):
    # remove instances of "Philippines"
    if 'Philippines' in locations:
        locations.remove('Philippines')
    
    # fuzzy search for each line; keep the options with the highest similarity
    possible_locs = []
    
    for loc in locations:
        max_similarity = 0
        locs = []
        final_locs = []
        
        # go through all provinces before municipalities
        for _, province in provinces.iterrows():
            jaro_distance = jaro_winkler(province[0].lower(), loc.lower())
#             jaro_distance = SequenceMatcher(None, province[0].lower(), loc.lower()).ratio()

            # if current similarity is higher than stored max similarity,
            # replace all area codes in locs[] with current and update max_similarity
            if jaro_distance > max_similarity:
                locs = [province[1]]
                max_similarity = jaro_distance

            # if current similarity is equal to the stored max similarity,
            # append current area code to locs[]
            elif int(jaro_distance) == int(max_similarity):
                locs.append(province[1])

        # append locs[] to final_locs[] only if similarity is acceptable
        if max_similarity >= 0.5:
            final_locs += locs
        
        max_similarity = 0
            
        # go through all municipalities
        for _, municipal in municipalities.iterrows():
            jaro_distance = SequenceMatcher(None, municipal[0].lower(), loc.lower()).ratio()

            # if current similarity is higher than stored max similarity,
            # replace all area codes in locs[] with current and update max_similarity
            if jaro_distance > max_similarity:
                locs = [municipal[1]]
                max_similarity = jaro_distance

            # if current similarity is equal to the stored max similarity,
            # append current area code to locs[]
            elif jaro_distance == max_similarity:
                locs.append(municipal[1])

        # append locs[] to final_locs[] only if similarity is acceptable
        if max_similarity >= 0.5:
            final_locs += locs

        possible_locs.append(final_locs)
        
    # find intersecting rows for all locations
    possible_locations = []
    
    try:
        _ = possible_locs[1]

        if len(locations) > 1:
            for loc in possible_locs[0]:
                for loc2 in possible_locs[1]:
                    if str(loc2)[:-5] == str(loc)[:-5]:
                        possible_locations += [loc, loc2]
    except IndexError:
        return '0' + str(possible_locs[0][0])[:-5] + '00000'

    # choose highest similarity
    possible_locations = list(set(possible_locations))
    return (str(possible_locations[0])[:-5] + '00000').zfill(9)

search(df, loc)

'143200000'

In [147]:
def get_boundary_coordinates(area_code):
    with open('Boundary_Provinces.geojson', 'r') as file:
        geojson = json.load(file)
        
        for x in range(len(geojson['features'])):
            if area_code == geojson['features'][x]['properties']['PHCode_Pro']:
                return geojson['features'][x]['geometry']['coordinates']
        
        # if no coordinates found, return None
        return None
    
get_boundary_coordinates('143200000')

[[[[121.438401, 17.690627],
   [121.439328, 17.690233],
   [121.438622, 17.689623],
   [121.439029, 17.688923],
   [121.439176, 17.68562],
   [121.436756, 17.671348],
   [121.439367, 17.668597],
   [121.4401, 17.666039],
   [121.44187, 17.664542],
   [121.443647, 17.663926],
   [121.446333, 17.663998],
   [121.447752, 17.66313],
   [121.449613, 17.661005],
   [121.452162, 17.661194],
   [121.455648, 17.662955],
   [121.457624, 17.663079],
   [121.457292, 17.666637],
   [121.456817, 17.666992],
   [121.458121, 17.668618],
   [121.457579, 17.669447],
   [121.460017, 17.668719],
   [121.460525, 17.66921],
   [121.461981, 17.668736],
   [121.463708, 17.669227],
   [121.464999, 17.670908],
   [121.465893, 17.670853],
   [121.466477, 17.672358],
   [121.467818, 17.67213],
   [121.468192, 17.672477],
   [121.469058, 17.671765],
   [121.470746, 17.672276],
   [121.472698, 17.672021],
   [121.474786, 17.670826],
   [121.475808, 17.668892],
   [121.476401, 17.669284],
   [121.477064, 17.668797],

In [148]:
def get_centroid_coordinates(area_code):
    with open('Centroid_Provinces.geojson', 'r') as file:
        geojson = json.load(file)
        
        for x in range(len(geojson['features'])):
            if area_code == geojson['features'][x]['properties']['PHCode_Pro']:
                return geojson['features'][x]['geometry']['coordinates']
        
        # if no coordinates found, return None
        return None
    
get_centroid_coordinates('112400000')

[125.345373, 7.016216]