# Merge geocode results back to dataset

In [9]:
import pandas as pd
# I showed geopandas during the 11/28 study hall
import geopandas as gpd

## Import Oakland 311 data with cleaned up street address

In [10]:
oakland311 = pd.read_csv('exports/oakland311_cleaned.csv')
oakland311.tail()

Unnamed: 0,REQUESTID,DATETIMEINIT,SOURCE,DESCRIPTION,REQCATEGORY,REQADDRESS,STATUS,REFERREDTO,DATETIMECLOSED,SRX,SRY,COUNCILDISTRICT,BEAT,PROBADDRESS,City,State,ELAPSED_TIME,ADDRESS_CLEANED
17069,1120609,2021-06-11 08:53:57,Phone,Sewers - Blockage,SEWERS,,CLOSED,,2021-06-29 07:09:33,,,,,7044 NORFOLK ROAD,Oakland,CA,17 days 22:15:36,7044 NORFOLK ROAD
17070,1113573,2021-05-15 17:29:28,Phone,City Bldg - Other/Complex,BLDGMAINT,,CLOSED,"GETWOOD, ROY",2021-06-02 13:43:29,,,,,Cross streets are skyline and juaqiem miller m...,Oakland,CA,17 days 20:14:01,CROSS STREETS ARE SKYLINE AND JUAQIEM MILLER M...
17071,1120590,2021-06-11 08:14:03,Phone,"Illegal Dumping - debris, appliances, etc.",ILLDUMP,,CLOSED,,2021-06-21 16:03:04,,,,,1310 76TH,Oakland,CA,10 days 07:49:01,1310 76TH
17072,1120594,2021-06-11 08:22:46,Phone,Parking - Abandoned Vehicle,POLICE,,CANCEL,,,,,,21X,2343 EAST 24TH ST,Oakland,CA,,2343 EAST 24TH ST
17073,1111146,2021-05-06 12:23:23,Phone,Graffiti in a Park,BLDGMAINT,,CLOSED,,2021-05-17 14:58:57,,,,,FOOTHILL MEADOWS (CESAR E. CHAVEZ) PARK,Oakland,CA,11 days 02:35:34,FOOTHILL MEADOWS (CESAR E. CHAVEZ) PARK


## Import geocode results data

In [11]:
geo_data_df = pd.read_csv(
    'exports/census_results.csv', 
    dtype={
        'tigerlineid': str,
        'statefp': str,
        'countyfp': str,
        'tract': str,
        'block': str
    }
)
geo_data_df

Unnamed: 0,id,address,match,matchtype,parsed,tigerlineid,side,statefp,countyfp,tract,block,lat,lon
0,0,"500 E. 22ND ST, Oakland, CA,",True,Exact,"500 22ND ST, OAKLAND, CA, 94612",124996500.0,R,6,1,402801,1011,37.811109,-122.269588
1,1,"900 36TH AV, Oakland, CA,",True,Exact,"900 36TH AVE, OAKLAND, CA, 94601",125006474.0,R,6,1,406100,2002,37.77294,-122.223884
2,3,"850 PINE ST, Oakland, CA,",True,Exact,"850 PINE ST, OAKLAND, CA, 94607",124995322.0,R,6,1,401700,1006,37.809568,-122.302636
3,4,"5300 BLOCK OF JAMES AVE, Oakland, CA,",True,Exact,"5300 JAMES AVE, OAKLAND, CA, 94618",124999407.0,R,6,1,400300,4011,37.838561,-122.253707
4,6,"2000 CAMPBELL ST, Oakland, CA,",True,Exact,"2000 CAMPBELL ST, OAKLAND, CA, 94607",606185175.0,R,6,1,401700,2007,37.816176,-122.29194
5,8,"2045 EAST 15TH ST, Oakland, CA,",True,Exact,"2045 E 15TH ST, OAKLAND, CA, 94606",606190585.0,R,6,1,405901,1005,37.786372,-122.238749
6,9,"MUNSON & E 15TH ST, Oakland, CA,",True,Non_Exact,"MUNSON WAY & E 15TH ST, OAKLAND, CA, 94606",,,6,1,405901,1010,37.784883,-122.236251


In [12]:
geo_data_gdf = gpd.GeoDataFrame(
    geo_data_df, 
    geometry=gpd.points_from_xy(
        geo_data_df['lon'], 
        geo_data_df['lat']
    )
)
geo_data_gdf

Unnamed: 0,id,address,match,matchtype,parsed,tigerlineid,side,statefp,countyfp,tract,block,lat,lon,geometry
0,0,"500 E. 22ND ST, Oakland, CA,",True,Exact,"500 22ND ST, OAKLAND, CA, 94612",124996500.0,R,6,1,402801,1011,37.811109,-122.269588,POINT (-122.26959 37.81111)
1,1,"900 36TH AV, Oakland, CA,",True,Exact,"900 36TH AVE, OAKLAND, CA, 94601",125006474.0,R,6,1,406100,2002,37.77294,-122.223884,POINT (-122.22388 37.77294)
2,3,"850 PINE ST, Oakland, CA,",True,Exact,"850 PINE ST, OAKLAND, CA, 94607",124995322.0,R,6,1,401700,1006,37.809568,-122.302636,POINT (-122.30264 37.80957)
3,4,"5300 BLOCK OF JAMES AVE, Oakland, CA,",True,Exact,"5300 JAMES AVE, OAKLAND, CA, 94618",124999407.0,R,6,1,400300,4011,37.838561,-122.253707,POINT (-122.25371 37.83856)
4,6,"2000 CAMPBELL ST, Oakland, CA,",True,Exact,"2000 CAMPBELL ST, OAKLAND, CA, 94607",606185175.0,R,6,1,401700,2007,37.816176,-122.29194,POINT (-122.29194 37.81618)
5,8,"2045 EAST 15TH ST, Oakland, CA,",True,Exact,"2045 E 15TH ST, OAKLAND, CA, 94606",606190585.0,R,6,1,405901,1005,37.786372,-122.238749,POINT (-122.23875 37.78637)
6,9,"MUNSON & E 15TH ST, Oakland, CA,",True,Non_Exact,"MUNSON WAY & E 15TH ST, OAKLAND, CA, 94606",,,6,1,405901,1010,37.784883,-122.236251,POINT (-122.23625 37.78488)


## How will we merge these two datasets?

In [13]:
geo_data_gdf.iloc[0]

id                                                         0
address                        500 E. 22ND ST, Oakland, CA, 
match                                                   True
matchtype                                              Exact
parsed                       500 22ND ST, OAKLAND, CA, 94612
tigerlineid                                        124996500
side                                                       R
statefp                                                   06
countyfp                                                 001
tract                                                 402801
block                                                   1011
lat                                                37.811109
lon                                              -122.269588
geometry       POINT (-122.26958841199996 37.81110892100003)
Name: 0, dtype: object

In [14]:
geo_data_gdf['ADDRESS_CLEANED'] = geo_data_gdf['address'].str.replace(
    ', Oakland, CA, ',
    '',
    regex=False
)
geo_data_gdf

Unnamed: 0,id,address,match,matchtype,parsed,tigerlineid,side,statefp,countyfp,tract,block,lat,lon,geometry,ADDRESS_CLEANED
0,0,"500 E. 22ND ST, Oakland, CA,",True,Exact,"500 22ND ST, OAKLAND, CA, 94612",124996500.0,R,6,1,402801,1011,37.811109,-122.269588,POINT (-122.26959 37.81111),500 E. 22ND ST
1,1,"900 36TH AV, Oakland, CA,",True,Exact,"900 36TH AVE, OAKLAND, CA, 94601",125006474.0,R,6,1,406100,2002,37.77294,-122.223884,POINT (-122.22388 37.77294),900 36TH AV
2,3,"850 PINE ST, Oakland, CA,",True,Exact,"850 PINE ST, OAKLAND, CA, 94607",124995322.0,R,6,1,401700,1006,37.809568,-122.302636,POINT (-122.30264 37.80957),850 PINE ST
3,4,"5300 BLOCK OF JAMES AVE, Oakland, CA,",True,Exact,"5300 JAMES AVE, OAKLAND, CA, 94618",124999407.0,R,6,1,400300,4011,37.838561,-122.253707,POINT (-122.25371 37.83856),5300 BLOCK OF JAMES AVE
4,6,"2000 CAMPBELL ST, Oakland, CA,",True,Exact,"2000 CAMPBELL ST, OAKLAND, CA, 94607",606185175.0,R,6,1,401700,2007,37.816176,-122.29194,POINT (-122.29194 37.81618),2000 CAMPBELL ST
5,8,"2045 EAST 15TH ST, Oakland, CA,",True,Exact,"2045 E 15TH ST, OAKLAND, CA, 94606",606190585.0,R,6,1,405901,1005,37.786372,-122.238749,POINT (-122.23875 37.78637),2045 EAST 15TH ST
6,9,"MUNSON & E 15TH ST, Oakland, CA,",True,Non_Exact,"MUNSON WAY & E 15TH ST, OAKLAND, CA, 94606",,,6,1,405901,1010,37.784883,-122.236251,POINT (-122.23625 37.78488),MUNSON & E 15TH ST


## Merge on key

I'm going to do an INNER join because we only geocoded a handful of addresses and I don't want to show the entire dataframe for lecture. However, in a real-life newsroom assignment, you would have to deal with the missing geocoded addresses in some other kind of way.

In [15]:
merged = pd.merge(oakland311, geo_data_gdf, on='ADDRESS_CLEANED', how='inner')
merged

Unnamed: 0,REQUESTID,DATETIMEINIT,SOURCE,DESCRIPTION,REQCATEGORY,REQADDRESS,STATUS,REFERREDTO,DATETIMECLOSED,SRX,...,parsed,tigerlineid,side,statefp,countyfp,tract,block,lat,lon,geometry
0,1212074,2022-06-02 13:39:35,Phone or Email,Parking - Abandoned Vehicle,POLICE,,WAITING ON CUSTOMER,,,,...,"900 36TH AVE, OAKLAND, CA, 94601",125006474.0,R,6,1,406100,2002,37.77294,-122.223884,POINT (-122.22388 37.77294)
1,424988,2012-09-11 11:12:54,Phone or Email,Illegal Dumping � mattress/boxspring,ILLDUMP,,CLOSED,,2012-09-13 16:41:52,,...,"MUNSON WAY & E 15TH ST, OAKLAND, CA, 94606",,,6,1,405901,1010,37.784883,-122.236251,POINT (-122.23625 37.78488)
2,445956,2013-02-07 12:53:04,Phone or Email,"Illegal Dumping - debris, appliances, etc.",ILLDUMP,,CLOSED,,2013-04-10 17:00:54,,...,"850 PINE ST, OAKLAND, CA, 94607",124995322.0,R,6,1,401700,1006,37.809568,-122.302636,POINT (-122.30264 37.80957)
3,573447,2015-03-26 09:42:57,Email,Recycling - Missed Pickup - Whole Block,RECYCLING,,CLOSED,,2015-03-26 17:06:21,,...,"5300 JAMES AVE, OAKLAND, CA, 94618",124999407.0,R,6,1,400300,4011,37.838561,-122.253707,POINT (-122.25371 37.83856)
4,1047980,2020-09-17 15:04:11,Phone or Email,Illegal Dumping � mattress/boxspring,ILLDUMP,,CLOSED,,2020-09-21 15:58:58,,...,"2000 CAMPBELL ST, OAKLAND, CA, 94607",606185175.0,R,6,1,401700,2007,37.816176,-122.29194,POINT (-122.29194 37.81618)
5,1065839,2020-11-16 13:13:56,Phone,Streets - Potholes/Depression,STREETSW,,OPEN,,,,...,"2045 E 15TH ST, OAKLAND, CA, 94606",606190585.0,R,6,1,405901,1005,37.786372,-122.238749,POINT (-122.23875 37.78637)
6,1092129,2021-02-25 11:19:02,Voicemail,Recycling Service Issues,RECYCLING,,CLOSED,,2021-02-26 13:41:56,,...,"500 22ND ST, OAKLAND, CA, 94612",124996500.0,R,6,1,402801,1011,37.811109,-122.269588,POINT (-122.26959 37.81111)


## Export your clean data!

In [16]:
merged.to_csv('exports/oakland311_geo.csv', index=False)