In [284]:
import numpy as np
import pandas as pd

In [285]:
df = pd.read_csv('psgc.csv', encoding ='latin1')

In [286]:
# Clean data
df['level'] = df['level'].str.lower()
df['name'] = df['name'].str.lower()
df = df.fillna('none')
df.head(3)

Unnamed: 0,code,name,level
0,10000000,region i (ilocos region),reg
1,12800000,ilocos norte,prov
2,12801000,adams,mun


In [287]:
'''
Test locations
- Locations are formatted inside a list of tuples.
- Inside each tuple are two strings: the name and location and its corresponding location type.
- There are 8 location types: 'reg', 'dist', city', 'prov', 'mun', 'submun', 'bgy', and 'none'.
'''

location1 = [('antipolo', 'none'), ('rizal', 'none')]
location2 = [('cagayan valley', 'reg')]
location3 = [('calamba', 'city'), ('rizal', 'prov')]

In [288]:
df[df['name'].str.contains(location1[0][0])].head(2)

Unnamed: 0,code,name,level
6679,34902001,antipolo,bgy
8780,37109002,antipolo (pob.),bgy


In [289]:
df[df['name'].str.contains(location1[1][0])].head(2)

Unnamed: 0,code,name,level
696,12905026,rizal (pob.),bgy
1035,12922025,rizal,bgy


In [290]:
location4 = [('quezon', 'city')]
df[(df['name'].str.contains(location4[0][0])) & (df['level'].str.contains(location4[0][1]))]

Unnamed: 0,code,name,level
37773,137404000,quezon city,city


In [291]:
def search(location):
    '''
    search() accepts a list of strings referring to locations and return its equivalent geographic code.
    Part 1: Collection
        a. Provide a list of strings that refer to a certain location.
        b. Find entries in the Philippine Standard Geographic Code list that contain the strings in column 'names'.
        c. Return the list of entries that match the query above.
    Part 2: Partition
        a. Separate the entries according to level (region, province, municipality, barangay) into new lists.
    Part 3: Comparision
        a. 
    '''
    # Collection
    names = []
    for name in location:
        subset = df[df['name'].str.contains(name[0])]
        tuples = [tuple(x) for x in subset.values]
        names.append(tuples)
        
    # Partition
    # regions = list(filter(lambda x: x[2]=='reg', names))
    # cities = list(filter(lambda x: x[2]=='city', names))
    # provinces = list(filter(lambda x: x[2]=='prov', names))
    # municipalities = list(filter(lambda x: x[2]=='mun', names))
    # barangays = list(filter(lambda x: x[2]=='bgy', names))
    
    # Comparison
    
    # print('regions', regions)
    # print('cities', cities)
    # print('provinces', provinces)
    # print('municipalities', municipalities)
    # print('barangays', barangays)
    return names

In [281]:
search(location1)

[[(34902001, 'antipolo', 'bgy'),
  (37109002, 'antipolo (pob.)', 'bgy'),
  (41014005, 'antipolo del norte', 'bgy'),
  (41014006, 'antipolo del sur', 'bgy'),
  (41021002, 'antipolo', 'bgy'),
  (41026002, 'antipolo', 'bgy'),
  (41027001, 'antipolo', 'bgy'),
  (43423001, 'antipolo', 'bgy'),
  (45630003, 'antipolo', 'bgy'),
  (45645001, 'antipolo', 'bgy'),
  (45802000, 'city of antipolo (capital)', 'city'),
  (174003001, 'antipolo', 'bgy'),
  (175208002, 'antipolo', 'bgy'),
  (51701002, 'antipolo', 'bgy'),
  (51705001, 'antipolo', 'bgy'),
  (51716001, 'antipolo', 'bgy'),
  (51722001, 'antipolo', 'bgy'),
  (51723002, 'antipolo old', 'bgy'),
  (51723003, 'antipolo young', 'bgy'),
  (51727001, 'antipolo', 'bgy'),
  (51737002, 'antipolo', 'bgy'),
  (52001001, 'antipolo', 'bgy'),
  (52011001, 'antipolo del norte', 'bgy'),
  (52011002, 'antipolo del sur', 'bgy'),
  (54115001, 'antipolo', 'bgy'),
  (56203004, 'antipolo', 'bgy'),
  (60406003, 'antipolo', 'bgy'),
  (64521001, 'antipolo', 'bgy'),
  