In [1]:
import pandas as pd
import numpy as np
import string
import requests
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Importing & Cleaning Data

Description: Reads in the data and stores it in the dataframe "data", then removes all punctuations and turns stores the cleaned data into the dataframe "fix"

To Do: Turn the list of addresses you receive into a csv file and update the name in the read_csv function

In [2]:
data = pd.read_csv("Sample Input.csv") #Here!

In [3]:
fix = {'stripaddress': data['stripaddress'], 'edit': data['stripaddress']}  
fix = pd.DataFrame(fix, columns = ['stripaddress', 'edit']) 

In [4]:
fix['edit'] = [s.translate(string.punctuation) for s in fix['edit']]
fix['edit'] = [s.replace(',', ' ').replace('.',' ').replace('#',' ') for s in fix['edit']]
fix['lst'] = [s.split() for s in fix['edit']]

### Columns for Lat & Long (Run Only Once!)

Description: Creates latitute, longitude, and address columns that will be filled in later from the Google Maps API

In [5]:
fix['lat']=np.repeat(None, len(fix['edit']))
fix['lng']=np.repeat(None, len(fix['edit']))
fix['add']=np.repeat(None, len(fix['edit']))

### Importing Shapefiles

Description: Uploads shapefiles to get coordinates data for each baranggay, and creates a dataframe called "df"

To Do: Change the address of the baranggay shapefile from '/Users/lorenzoflores/Desktop/LOOKUP-NCR-2ndDistrict-QuezonCity/Barangays.shp' to wherever it is stored on your computer

In [9]:
sns.set(style="whitegrid", palette="pastel", color_codes=True)
sns.mpl.rc("figure", figsize=(10,6))
%matplotlib inline

shp_path = '/Users/lorenzoflores/Desktop/LOOKUP-NCR-2ndDistrict-QuezonCity/Barangays.shp' #Here!
sf = shp.Reader(shp_path, encoding='latin-1')

In [10]:
def read_shapefile(sf):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' 
    column holding the geometry information. This uses the pyshp
    package
    """
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)
    return df

### Subsetting Shapefiles for Quezon City 

Description: Removes all other baranggay shapefiles from the df dataframe except for the area you choose, in order to reduce run time

To Do: Change Metropolitan Manila to whichever province you're working on, run the code below to check the available provinces

In [73]:
#Run me to check available provinces!
set(df['NAME_1'])

{'Metropolitan Manila'}

In [11]:
df = read_shapefile(sf)
df = df[df['NAME_1']=='Metropolitan Manila'] #Here!
df = df.reset_index(drop=True)

### Geotagging from Google Maps (Run Only Once!)

Description: Uses Google Maps to identify coordinates for each address

To Do: Input your own Google API key for the geotagging API as it says below

In [12]:
for i in range(len(fix['edit'])):
    if i%100==0:
        print("Now on: "+str(i))
    try:
        x = fix['edit'][i]
        response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address='+
                                x+',Philippines'+'&key='+"INSERT YOUR KEY HERE IN THE QUOTATION MARKS")
        resp_json_payload = response.json()
        fix['add'][i] = resp_json_payload['results'][0]['formatted_address']
        fix['lat'][i] = resp_json_payload['results'][0]['geometry']['location']['lat']
        fix['lng'][i] = resp_json_payload['results'][0]['geometry']['location']['lng']
    except:
        print("Error on: "+str(i))
        print(fix['edit'][i])

Now on: 0
Now on: 100
Now on: 200
Error on: 296
1141 SANTA RC
Now on: 300
Now on: 400
Now on: 500
Now on: 600
Error on: 615
142 TARLAC ST COMMONWEALTH QUEZON CITY
Error on: 659
15 BLK 52 PANTOMINA MAYOR ST LAGRO SUBD NOVALICHES QUEZON CITY
Now on: 700
Error on: 777
16-C BLK A STO  NIÑO ST   QUEZON CITY CPO  QUEZON CITY 1100
Now on: 800
Error on: 867
189 EVERLASTING ST PINGKIAN 3 ZN 1 PASONG TAMO QUEZON CITY
Now on: 900
Error on: 995
207 SECOND NORTH 2906 KING SOLOMON ST AREA C
Error on: 996
207 UNIT 5 ADARNA STREET PUROK 6 COMMONWEALTH QUEZON CITY
Now on: 1000
Now on: 1100
Error on: 1187
27 NUPAX CMPD BAGBAG NOVALICHES QUEZON CITY
Now on: 1200
Now on: 1300
Error on: 1358
35-D PROJECT 4 ESCOPA I QUEZON CITY NCR
Now on: 1400
Now on: 1500
Now on: 1600
Now on: 1700
Now on: 1800
Now on: 1900
Now on: 2000
Now on: 2100
Now on: 2200
Now on: 2300
Now on: 2400
Error on: 2454
B7-L14/16 P1 SILVERCREEK SUBD 
Error on: 2455
B7-L22 PORTICO-I SUBD 
Now on: 2500
Now on: 2600
Now on: 2700
Error on: 2700

### Baranggay + Address Coordinates Matching

Description: Identifies the baranggay to which the coordinates of the address belongs

In [13]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

Turn each baranggay in df into a polygon

In [14]:
df['poly']=np.repeat(None, len(df['NAME_1']))
for i in range(len(df['poly'])):   
    df['poly'][i] = Polygon(df.iloc[i,]['coords'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Turn each address in fix into a point

In [45]:
fix['pt']=np.repeat(None, len(fix['lat']))
for i in range(len(fix['lat'])):
    if fix['add'][i] is not None:
        fix['pt'][i] = Point(fix['lng'][i],fix['lat'][i])
    else:
        pass

Create a baranggay name column

In [19]:
fix['bgy']=np.repeat(None, len(fix['lat']))

For each point, check if it belongs to a baranggay

In [63]:
for i in range(len(fix['edit'])):
    if i%100==0:
        print("Now on: "+str(i))
    if fix['add'][i] is not None:
        for j in range(len(df['poly'])):
            if fix['pt'][i].within(df['poly'][j]):
                fix['bgy'][i] = df['NAME_3'][j]
                break
    if fix['bgy'][i] is None:
        print("Check: "+str(i))
        print(fix['edit'][i])

Now on: 0
Check: 4
001 LEYTE GROUP 7 PAYATAS QUEZON CITY
Check: 11
004 ZAMBOANGA ST   BAI COMPOUND  BRGY  VASRA  QUEZON CITY  N/A
Check: 93
079 GINUNTUANG LANDAS FOREST HILL SANTA MONICA QUEZON CITY
Now on: 100
Now on: 200
Check: 215
1064 A CULLEJON LIWANAG BGY 215 TONDO MANILA
Check: 218
107 SAMPAGUITA EXT PAYATAS QUEZON CITY
Check: 235
109 LIVERPOOL ST CLASSICA II VISTA REAL QUEZON CITY
Check: 287
114 CHICO ST QUIRINO 2B PROJEC 2 ALICIA QUEZON CITY
Check: 296
1141 SANTA RC
Now on: 300
Check: 307
116 BAGUIO CMPD ROAD 20 PROJECT 8 QUEZON CITY
Check: 391
120 NEW YORK STREET  BRGY  CUBAO  QUEZON CITY  N/A  N/A  N/A
Now on: 400
Now on: 500
Check: 547
137 SAINT HERMINE ST  BROOKSIDE BAGONG SILANGAN QUEZON CITY NCR
Check: 584
13K BLK A STO NIÑO ST SAN ANTONIO SFDM SAN ANTONIO QUEZON CITY
Now on: 600
Check: 615
142 TARLAC ST COMMONWEALTH QUEZON CITY
Check: 634
146 CAMIA ST SAN ROQUE 2 PAGASA QUEZON CITY
Check: 659
15 BLK 52 PANTOMINA MAYOR ST LAGRO SUBD NOVALICHES QUEZON CITY
Now on: 700
Che

Check: 4186
L PASCO ST CORNER DONA YAYANG LIBIS QUEZON CITY
Now on: 4200
Check: 4213
L16 BRIMLEY ST NORTH FAIRVIEW QUEZON CITY
Check: 4227
L22 KAKAMPI ST  BATASAN HILLS QUEZON CITY NCR
Check: 4258
L46 B1 SAN MIGUEL REALTY PHASE 1 BRGY FORTUNE MARIKINA
Now on: 4300
Check: 4367
LLANO ST  TS AVE SANGANDAAN   QUEZON CITY
Check: 4376
LOT 1 BLK 3 UNION VILL BO BAGUMBONG NOVA  QUEZON CITY
Check: 4397
LOT 25 BLK 2 SANTAN ST MALIGAYA NOVALICHES PROPER QUEZON CITY
Now on: 4400
Check: 4429
LOT 99 LIBIS QUEZON CITY NCR
Check: 4430
LOT VICENTE CPD BROOKSIDE BGY BAGONG SILANG QUEZON CITY
Check: 4461
LUCENA ST PROJECT 6 QUEZON CITY
Check: 4482
MA FATIMA ST DOÑA CARMEN SUBDIVISION CONGRESS PARK FAIRVIEW QUEZON CITY
Now on: 4500
Now on: 4600
Check: 4692
MATIN-AO PRK  PAG-ASA  SILWAY 8 POL SO COT   N/A  N/A  N/A
Now on: 4700
Check: 4731
MENDOZA CMPD SAMONTE RD NAGKAISANG NAYON NOVALICHES PROPER QUEZON CITY
Check: 4761
MOLAVE ST NORTHVIEW II FILINVEST II QUEZON CITY
Check: 4764
MOLAVE ST  NORTHVIEW 2  BA

### Check the Results – AKA Manual Encoding Part :(

#### Keyword Identification (Old Code, Do Not Run)

Description: Some addresses already have the baranggay written in them – so we don't need to search those up. Then, we identify addresses by villages/projects/roads which will hopefully make it easier to classify things.

In [109]:
def nextword(target, source, count):
    for i, w in enumerate(source):
        if w == target:
            try:
                return source[i+count]
            except:
                return ' '

key = []
entry = []
for item in fix['lst']:
    keys = set(item).intersection(['BGY','BRGY','VILLAGE','SUBD','HILLS','HOMES','HEIGHTS','HTS','PROJ','PROJECT',
                                   'BLDG','CONDO','ST','STR','RD','AVE','STREET','ROAD','AVENUE','DRIVE','SCT',
                                   'SCOUT','EXT','EXTN','EXTENSION','APT'])
    if len(keys) == 0:
        key.append('None')
        entry.append(' '.join(item)) 
    #Baranggay
    if 'BGY' in keys:
        key.append('Barangay')
        entry.append(nextword('BGY',item,1))
    elif 'BRGY' in keys:
        key.append('Barangay')
        entry.append(nextword('BRGY',item,1))
    #Village/Subdivision
    elif 'VILLAGE' in keys:
        key.append('Village')
        entry.append(nextword('VILLAGE',item,-1))
    elif 'SUBD' in keys:
        key.append('Village')
        entry.append(nextword('SUBD',item,-1))
    elif 'HILLS' in keys:
        key.append('Village')
        entry.append(nextword('HILLS',item,-1))
    elif 'HOMES' in keys:
        key.append('Village')
        entry.append(nextword('HOMES',item,-1))
    elif 'HEIGHTS' in keys:
        key.append('Village')
        entry.append(nextword('HEIGHTS',item,-1))
    elif 'HTS' in keys:
        key.append('Village')
        entry.append(nextword('HTS',item,-1))
    #Project
    elif 'PROJ' in keys:
        key.append('Project')
        entry.append(nextword('PROJ',item,1))
    elif 'PROJECT' in keys:
        key.append('Project')
        entry.append(nextword('PROJECT',item,1))
    #Road
    elif 'ST' in keys:
        key.append('Road')
        entry.append(nextword('ST',item,-1))
    elif 'RD' in keys:
        key.append('Road')
        entry.append(nextword('RD',item,-1))
    elif 'STR' in keys:
        key.append('Road')
        entry.append(nextword('STR',item,-1))
    elif 'STREET' in keys:
        key.append('Road')
        entry.append(nextword('STREET',item,-1))
    elif 'DRIVE' in keys:
        key.append('Road')
        entry.append(nextword('DRIVE',item,-1))
    elif 'EXT' in keys:
        key.append('Road')
        entry.append(nextword('EXT',item,-1))
    elif 'EXTENSION' in keys:
        key.append('Road')
        entry.append(nextword('EXTENSION',item,-1))
    elif 'EXTN' in keys:
        key.append('Road')
        entry.append(nextword('EXTN',item,-1))
    elif 'SCT' in keys:
        key.append('Road')
        entry.append(nextword('SCT',item,1))
    elif 'SCOUT' in keys:
        key.append('Road')
        entry.append(nextword('SCOUT',item,1))
    
    #Building
    elif 'BLDG' in keys:
        key.append('Building')
        entry.append(nextword('BLDG',item,-1))
    elif 'CONDO' in keys:
        key.append('Building')
        entry.append(nextword('CONDO',item,-1))
    elif 'APT' in keys:
        key.append('Building')
        entry.append(nextword('APT',item,-1))
    elif 'RESIDENCE' in keys:
        key.append('Building')
        entry.append(nextword('RESIDENCE',item,-1))
    elif 'RESIDENCES' in keys:
        key.append('Building')
        entry.append(nextword('RESIDENCES',item,-1))

    #Avenue
    elif 'AVE' in keys:
        key.append('Avenue')
        entry.append(nextword('AVE',item,-1))
    elif 'AVENUE' in keys:
        key.append('Avenue')
        entry.append(nextword('AVENUE',item,-1))
    elif 'ROAD' in keys:
        key.append('Avenue')
        entry.append(nextword('ROAD',item,-1))
    else:
        pass

fix['key'] = key
fix['entry'] = entry

The list "l" contains indices of addresses with no baranggays yet.

In [89]:
l=[i for i,v in enumerate(fix['bgy']) if v == None]

#### Checking for Baranggays

Description: Follow the code below to manually encode!

In [135]:
#Shows the rows for missing barangays
fix.iloc[l,:][fix['key']=='Barangay'].head(2)

  


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
11,"004 ZAMBOANGA ST., BAI COMPOUND, BRGY. VASRA, ...",004 ZAMBOANGA ST BAI COMPOUND BRGY VASRA ...,"[004, ZAMBOANGA, ST, BAI, COMPOUND, BRGY, VASR...",6.92144,122.079,"Zamboanga, Philippines",Barangay,VASRA,POINT (122.0790267 6.9214424),Vasra
215,1064 A CULLEJON LIWANAG BGY 215 TONDO MANILA,1064 A CULLEJON LIWANAG BGY 215 TONDO MANILA,"[1064, A, CULLEJON, LIWANAG, BGY, 215, TONDO, ...",45.6696,9.70363,Il Caravaggio Orio al Serio International Airp...,Barangay,215,POINT (9.7036313 45.66957070000001),215


In [116]:
#Gets indices of missing barangays
missBrgy = fix.iloc[l,:][fix['key']=='Barangay']
missBrgy = missBrgy.index.tolist()

  """Entry point for launching an IPython kernel.


In [92]:
#Input the list of barangays in "brgys"
counter = 0
brgys = ['Vasra','215','E. Rodriguez','93','Gulod','Santo Nino','171','Pury','San Antonio',
         'Fortune','Bagong Silang','Bahay Toro','Dona Imelda','Batasan Hills']
for i in missBrgy:
    fix['bgy'][i] = brgys[counter]
    counter += 1

#### Checking for Villages

In [119]:
missVill = fix.iloc[l,:][fix['key']=='Village']
missVill = missVill.index.tolist()

  """Entry point for launching an IPython kernel.


In [118]:
fix.iloc[l,:][fix['key']=='Village']

  """Entry point for launching an IPython kernel.


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
659,15 BLK 52 PANTOMINA MAYOR ST LAGRO SUBD NOVALI...,15 BLK 52 PANTOMINA MAYOR ST LAGRO SUBD NOVALI...,"[15, BLK, 52, PANTOMINA, MAYOR, ST, LAGRO, SUB...",,,,Village,LAGRO,,
878,18C SNT JOSEPH ST MILTON HILLS SUBD NEW ERA (C...,18C SNT JOSEPH ST MILTON HILLS SUBD NEW ERA (C...,"[18C, SNT, JOSEPH, ST, MILTON, HILLS, SUBD, NE...",37.9907,-78.4056,"Milton Hills, Scottsville, VA 22902, USA",Village,HILLS,POINT (-78.40555139999999 37.9906854),
879,18C SNT JOSEPH ST MILTON HILLS SUBD NEW ERA (C...,18C SNT JOSEPH ST MILTON HILLS SUBD NEW ERA (C...,"[18C, SNT, JOSEPH, ST, MILTON, HILLS, SUBD, NE...",37.9907,-78.4056,"Milton Hills, Scottsville, VA 22902, USA",Village,HILLS,POINT (-78.40555139999999 37.9906854),
1225,293 105 ST SOLDIERS VILLAGE SANTA LUCIA QUEZON...,293 105 ST SOLDIERS VILLAGE SANTA LUCIA QUEZON...,"[293, 105, ST, SOLDIERS, VILLAGE, SANTA, LUCIA...",14.5767,121.103,"205 90 Railroad St, Pasig, 1608 Metro Manila, ...",Village,SOLDIERS,POINT (121.1029383 14.5766815),
1410,3RD FLOOR 61 SAN ILDEFONSO DRIVE TORNES VILLAG...,3RD FLOOR 61 SAN ILDEFONSO DRIVE TORNES VILLAG...,"[3RD, FLOOR, 61, SAN, ILDEFONSO, DRIVE, TORNES...",35.892,-106.118,"San Ildefonso Pueblo, NM 87506, USA",Village,TORNES,POINT (-106.1183576 35.8919694),
1997,"A SAN ANTONIO SUBD. DAMONG MALIIT NOVA QC, DAM...",A SAN ANTONIO SUBD DAMONG MALIIT NOVA QC DAM...,"[A, SAN, ANTONIO, SUBD, DAMONG, MALIIT, NOVA, ...",29.4241,-98.4936,"San Antonio, TX, USA",Village,ANTONIO,POINT (-98.49362819999999 29.4241219),
2100,ALLADO ST SAN ANTONIO SUBD DAMONG MALIIT NAGKA...,ALLADO ST SAN ANTONIO SUBD DAMONG MALIIT NAGKA...,"[ALLADO, ST, SAN, ANTONIO, SUBD, DAMONG, MALII...",29.4241,-98.4936,"San Antonio, TX, USA",Village,ANTONIO,POINT (-98.49362819999999 29.4241219),
2185,"AREA 4 BAYANIHAN ST., TALANAY BATASAN HILLS QU...",AREA 4 BAYANIHAN ST TALANAY BATASAN HILLS QU...,"[AREA, 4, BAYANIHAN, ST, TALANAY, BATASAN, HIL...",14.6798,121.107,"Batasan-San Mateo Rd, Quezon City, 1126 Metro ...",Village,BATASAN,POINT (121.1074655 14.6797618),
2442,B59 BRIMLEY ST FAIRVILLE HOMES NORTH FAIRVIEW ...,B59 BRIMLEY ST FAIRVILLE HOMES NORTH FAIRVIEW ...,"[B59, BRIMLEY, ST, FAIRVILLE, HOMES, NORTH, FA...",46.4041,-84.5723,"Brimley, MI 49715, USA",Village,FAIRVILLE,POINT (-84.5723077 46.4041232),
2454,B7-L14/16 P1 SILVERCREEK SUBD.,B7-L14/16 P1 SILVERCREEK SUBD,"[B7-L14/16, P1, SILVERCREEK, SUBD]",,,,Village,SILVERCREEK,,


In [129]:
fix.loc[6153,]['edit']

'YANEZA DRIVE SERRA MONTE VILLAS FILINVEST 2 BATASAN HILLS QUEZON CITY'

In [130]:
counter = 0
vills = ['Greater Lagro','Culiat','Culiat','Santa Lucia','Nagkaisang Nayon',
         'Nagkaisang Nayon','Nagkaisang Nayon','Batasan Hills','Greater Lagro',
         'Bocohan','Lumbia','Holy Spirit','Batasan Hills','180','Nagkaisang Nayon',
          'Batasan Hills','Santa Monica','Greater Lagro','Nagkaisang Nayon',
          'Batasan Hills','Bagong Silangan','Batasan Hills','Batasan Hills',
          'Holy Spirit','Balintawak','Batasan Hills','Batasan Hills','Batasan Hills',
          'Batasan Hills','Batasan Hills','Batasan Hills','Batasan Hills',
          'Batasan Hills','Batasan Hills','Batasan Hills','Luinab','Balintawak',
          'Batasan Hills','Lahug','San Bartolome','171','164','Batasan Hills']
for i in missVill:
    fix['bgy'][i] = vills[counter]
    counter += 1

#### Checking for Projects

In [131]:
missProj = fix.iloc[l,:][fix['key']=='Project']
missProj = missProj.index.tolist()

  """Entry point for launching an IPython kernel.


In [132]:
fix.iloc[l,:][fix['key']=='Project']

  """Entry point for launching an IPython kernel.


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
307,116 BAGUIO CMPD ROAD 20 PROJECT 8 QUEZON CITY,116 BAGUIO CMPD ROAD 20 PROJECT 8 QUEZON CITY,"[116, BAGUIO, CMPD, ROAD, 20, PROJECT, 8, QUEZ...",16.4023,120.596,"Baguio, Benguet, Philippines",Project,8,POINT (120.5960071 16.4023332),
1358,35-D PROJECT 4 ESCOPA I QUEZON CITY NCR,35-D PROJECT 4 ESCOPA I QUEZON CITY NCR,"[35-D, PROJECT, 4, ESCOPA, I, QUEZON, CITY, NCR]",,,,Project,4,,
1626,"5-A AREVALO COMPD. GRANTS ST., PROJ. 8, SANGAN...",5-A AREVALO COMPD GRANTS ST PROJ 8 SANGAN...,"[5-A, AREVALO, COMPD, GRANTS, ST, PROJ, 8, SAN...",41.059,-4.71874,"05200 Arévalo, Ávila, Spain",Project,8,POINT (-4.7187424 41.058981),
2835,BLK 48 LOT 35 PROJ 4 ESCOPA III QUEZON CITY NCR,BLK 48 LOT 35 PROJ 4 ESCOPA III QUEZON CITY NCR,"[BLK, 48, LOT, 35, PROJ, 4, ESCOPA, III, QUEZO...",46.3826,-63.0314,"Lot 35, PE, Canada",Project,4,POINT (-63.0314308 46.3826063),
3076,C T MASCARDO ST VILLA MARIA CLARA PROJ 4 QUEZO...,C T MASCARDO ST VILLA MARIA CLARA PROJ 4 QUEZO...,"[C, T, MASCARDO, ST, VILLA, MARIA, CLARA, PROJ...",41.6032,-73.0877,"Connecticut, USA",Project,4,POINT (-73.087749 41.6032207),
3097,CAIRO ST STA RITA VILL PROJ 8 QUEZON CITY,CAIRO ST STA RITA VILL PROJ 8 QUEZON CITY,"[CAIRO, ST, STA, RITA, VILL, PROJ, 8, QUEZON, ...",30.0444,31.2357,"Cairo, Cairo Governorate, Egypt",Project,8,POINT (31.2357116 30.0444196),
3536,ERVIE BEN NOBLEZA 1009 AURORA BLVD PROJECT 3 D...,ERVIE BEN NOBLEZA 1009 AURORA BLVD PROJECT 3 D...,"[ERVIE, BEN, NOBLEZA, 1009, AURORA, BLVD, PROJ...",54.9655,-5.12506,"Ervie, Stranraer DG9 0RB, UK",Project,3,POINT (-5.125062 54.965514),
4461,LUCENA ST PROJECT 6 QUEZON CITY,LUCENA ST PROJECT 6 QUEZON CITY,"[LUCENA, ST, PROJECT, 6, QUEZON, CITY]",13.9414,121.623,"Lucena, 4301 Quezon, Philippines",Project,6,POINT (121.6234471 13.9413957),


In [133]:
counter = 0
projs = ['Bahay Toro','Escopa I','Sangandaan','Escopa III','Milagrosa','Bahay Toro',
         'Duyan Duyan','Project 6']
for i in missProj:
    fix['bgy'][i] = projs[counter]
    counter += 1

#### Checking for Roads

In [136]:
missRoad = fix.iloc[l,:][fix['key']=='Road']
missRoad = missRoad.index.tolist()

  """Entry point for launching an IPython kernel.


In [160]:
fix.iloc[l,:][fix['key']=='Road'][70:]

  """Entry point for launching an IPython kernel.


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
6103,VISAYAS EXTN PINGKIAN 3 ZONE 3 PASONG TAMO QUE...,VISAYAS EXTN PINGKIAN 3 ZONE 3 PASONG TAMO QUE...,"[VISAYAS, EXTN, PINGKIAN, 3, ZONE, 3, PASONG, ...",11,123.5,"Visayas, Philippines",Road,VISAYAS,POINT (123.5 11),


In [159]:
fix.loc[5963,]['edit']

'UNIT 3D APT VERLAN SANTIAGO ST   PUTOL VISTA VERDE'

In [161]:
counter = 0
roads = ['Payatas','Batasan Hills','Quirino 2B','Bagong Silangan','San Antonio',
         'Batasan Hills','Sauyo','Duyan-Duyan','177','175',
         '177','Batasan Hills','Santa Cruz','Payatas','Bago Bantay',
         'Don Bosco','Kaunlaran','Santolan','Santolan','Camp Aguinaldo',
         'Malanday','Gulod','Bagong Silangan','Bagong Silangan','Balintawak',
         'Santo Nino','Santo Nino','Camp Aguinaldo','North Fairview','Libis',
         'Central','North Fairview','Santa Cruz','North Fairview','Amihan',
         'Holy Spirit','Westt Fairview','Kaligayahan','Greater Lagro','North Fairview',
         'North Fairview','Tatalon','Central','Santa Cruz','Manresa',
         'Quirino 3A','Llano','Bagbag','Payatas','Santa Lucia',
         'Libis','North Fairview','Sangandaan','San Bartolome','Commonwealth',
         'Nagkaisang Nayon','Batasan Hills','West Fairview','Batasan Hills','Batasan Hills',
         'Roxas','Batasan Hills','Dona Imelda','Tandang Sora','Baesa',
         '177','Santa Mesa Heights','Llano','Commonwealth','Lahug',
         'Pasong Tamo'
        ]
for i in missRoad:
    fix['bgy'][i] = roads[counter]
    counter += 1

#### Checking for Buildings

In [162]:
missBuil = fix.iloc[l,:][fix['key']=='Building']
missBuil = missBuil.index.tolist()

  """Entry point for launching an IPython kernel.


In [163]:
fix.iloc[l,:][fix['key']=='Building']

  """Entry point for launching an IPython kernel.


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
1042,216A WOODRIDGE BLDG SOFIA BELLEVUE MATANDANG B...,216A WOODRIDGE BLDG SOFIA BELLEVUE MATANDANG B...,"[216A, WOODRIDGE, BLDG, SOFIA, BELLEVUE, MATAN...",41.747,-88.0503,"Woodridge, IL, USA",Building,WOODRIDGE,POINT (-88.0503406 41.7469749),
5185,"PINK CONDO TIERRA BELLA TANDANG SORA Q.C., N/A...",PINK CONDO TIERRA BELLA TANDANG SORA Q C N/A...,"[PINK, CONDO, TIERRA, BELLA, TANDANG, SORA, Q,...",,,,Building,PINK,,
5911,UNIT 10A BLDG 3 BLK 3 GOLDMINE INT NOVALICHES ...,UNIT 10A BLDG 3 BLK 3 GOLDMINE INT NOVALICHES ...,"[UNIT, 10A, BLDG, 3, BLK, 3, GOLDMINE, INT, NO...",14.7625,121.031,"Block 3, Caloocan, Metro Manila, Philippines",Building,10A,POINT (121.0307651 14.7625237),
5915,UNIT 114 APARTELLE BLDG QUIRINO HI WAY PASONG ...,UNIT 114 APARTELLE BLDG QUIRINO HI WAY PASONG ...,"[UNIT, 114, APARTELLE, BLDG, QUIRINO, HI, WAY,...",14.8138,121.072,"Quirino Hwy, San Jose del Monte City, Bulacan,...",Building,APARTELLE,POINT (121.0723104 14.8138076),


In [165]:
fix.loc[5911,]['edit']

'UNIT 10A BLDG 3 BLK 3 GOLDMINE INT NOVALICHES NAGKAISANG NAYON QUEZON CITY'

In [166]:
counter = 0
builds = ['Matandang Balara','Tandang Sora','Nagkaisang Nayon','Pasong Tamo'
        ]
for i in missBuil:
    fix['bgy'][i] = builds[counter]
    counter += 1

#### Checking for Avenues

In [167]:
missAves = fix.iloc[l,:][fix['key']=='Avenue']
missAves = missAves.index.tolist()

  """Entry point for launching an IPython kernel.


In [168]:
fix.iloc[l,:][fix['key']=='Avenue']

  """Entry point for launching an IPython kernel.


Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
939,19C NORTH DIVERSION ROAD UNANG SIGAW BALINTAWA...,19C NORTH DIVERSION ROAD UNANG SIGAW BALINTAWA...,"[19C, NORTH, DIVERSION, ROAD, UNANG, SIGAW, BA...",14.725,120.984,"19 E1, Valenzuela, 1447 Bulacan, Philippines",Avenue,DIVERSION,POINT (120.984126 14.725003),
4890,Ñ84-C ROAD 2 PAG-ASA QUEZON CITY,Ñ84-C ROAD 2 PAG-ASA QUEZON CITY,"[Ñ84-C, ROAD, 2, PAG-ASA, QUEZON, CITY]",,,,Avenue,Ñ84-C,,


In [170]:
counter = 0
aves = ['Balintawak','Bagong Pagasa'
        ]
for i in missAves:
    fix['bgy'][i] = aves[counter]
    counter += 1

#### Everything Else

Now we check for all those that still have not been classified, and do it super manually. We first update the list of missing entries 'l'.

In [174]:
l=[i for i,v in enumerate(fix['bgy']) if v == None]
fix.iloc[l,:]

Unnamed: 0,stripaddress,edit,lst,lat,lng,add,key,entry,pt,bgy
4,001 LEYTE GROUP 7 PAYATAS QUEZON CITY,001 LEYTE GROUP 7 PAYATAS QUEZON CITY,"[001, LEYTE, GROUP, 7, PAYATAS, QUEZON, CITY]",11.3497,124.464,"Leyte, Philippines",,001 LEYTE GROUP 7 PAYATAS QUEZON CITY,POINT (124.4641848 11.3496551),
93,079 GINUNTUANG LANDAS FOREST HILL SANTA MONICA...,079 GINUNTUANG LANDAS FOREST HILL SANTA MONICA...,"[079, GINUNTUANG, LANDAS, FOREST, HILL, SANTA,...",50.4738,3.3009,"59310 Landas, France",,079 GINUNTUANG LANDAS FOREST HILL SANTA MONICA...,POINT (3.3008989 50.47381600000001),
296,1141 SANTA RC,1141 SANTA RC,"[1141, SANTA, RC]",,,,,1141 SANTA RC,,
977,203 BLK 6 DAMAYANG LAGI QUEZON CITY NCR,203 BLK 6 DAMAYANG LAGI QUEZON CITY NCR,"[203, BLK, 6, DAMAYANG, LAGI, QUEZON, CITY, NCR]",14.7628,121.031,"Block 6, Caloocan, Metro Manila, Philippines",,203 BLK 6 DAMAYANG LAGI QUEZON CITY NCR,POINT (121.0314158 14.7627947),
1187,27 NUPAX CMPD BAGBAG NOVALICHES QUEZON CITY,27 NUPAX CMPD BAGBAG NOVALICHES QUEZON CITY,"[27, NUPAX, CMPD, BAGBAG, NOVALICHES, QUEZON, ...",,,,,27 NUPAX CMPD BAGBAG NOVALICHES QUEZON CITY,,
1375,373 QUIRINO HI WAY QUEZON CITY,373 QUIRINO HI WAY QUEZON CITY,"[373, QUIRINO, HI, WAY, QUEZON, CITY]",14.8138,121.072,"Quirino Hwy, San Jose del Monte City, Bulacan,...",,373 QUIRINO HI WAY QUEZON CITY,POINT (121.0723104 14.8138076),
1650,607 GEN MALVAR BAGONG SILANGAN QUEZON CITY,607 GEN MALVAR BAGONG SILANGAN QUEZON CITY,"[607, GEN, MALVAR, BAGONG, SILANGAN, QUEZON, C...",14.6962,121.11,"607 Gen. Malvar, Quezon City, Metro Manila, Ph...",,607 GEN MALVAR BAGONG SILANGAN QUEZON CITY,POINT (121.1104401 14.6961677),
1669,"63 L-1 STO. NINO 1 CSJDM, BULACAN",63 L-1 STO NINO 1 CSJDM BULACAN,"[63, L-1, STO, NINO, 1, CSJDM, BULACAN]",14.8575,121.066,"Alley V, San Jose del Monte City, Bulacan, Phi...",,63 L-1 STO NINO 1 CSJDM BULACAN,POINT (121.06586 14.85751),
2009,A SOUTH GATE BALARA PANSOL QUEZON CITY,A SOUTH GATE BALARA PANSOL QUEZON CITY,"[A, SOUTH, GATE, BALARA, PANSOL, QUEZON, CITY]",33.9547,-118.212,"South Gate, CA, USA",,A SOUTH GATE BALARA PANSOL QUEZON CITY,POINT (-118.2120161 33.954737),
2176,AREA 06 SITIO CABUYAO SAUYO QC,AREA 06 SITIO CABUYAO SAUYO QC,"[AREA, 06, SITIO, CABUYAO, SAUYO, QC]",14.2471,121.137,"Cabuyao, Laguna, Philippines",,AREA 06 SITIO CABUYAO SAUYO QC,POINT (121.1366715 14.2471423),


In [180]:
fix.loc[6021,]['edit']

'UNIT A202A EC PUEBLO 1 44 KING CHRISTIAN KINGSPOINT NOVALICHES BAGBAG QUEZON CITY'

In [181]:
counter = 0
missings = ['Payatas','Santa Monica',None,'Damayang Lagi','Bagbag',
            'Talipapa','Bagong Silangan',None,'Pansol','Sauyo',
            'Bagong Pagasa','178','Pasong Camachile 1','Pasong Tamo','171',
            'Greater Lagro','Tatalon',None,'Bagong Lipunan ng Crame',
            None,None,'Bagong Lipunan ng Crame','171','Libis',
            None,'North Fairview','None','None','Baligang',
            'Sauyo','Vasra','Vasra','Pansol','Nagkaisang Nayon',
            'Santo Nino','Libis','Bagbag','Bancom Subdivision'
        ]
for i in l:
    fix['bgy'][i] = missings[counter]
    counter += 1

Remove all the words 'barangay' from the barangay names, update the initial dataframe called 'data', then output the final csv

In [187]:
import csv
fix['bgy'] = [s.replace('Barangay','') if s is not None else None for s in fix['bgy'] ]
data['Barangay'] = fix['bgy']
data.to_csv('/Users/lorenzoflores/Desktop/Dengue_Quezon_City_Baranggays.csv')