# Example Geocoding Notebook

 https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
 
 parameters necessary in the API call for geocoding
 1. returntype: – locations(to get just geocoding response) or
                  geographies(to get geocoding response as well as geoLookup)
 2. searchtype: onelineaddress (uses 5.a.) OR address (uses 5.b.) OR coordinates
 3. benchmark: DatasetType_SpatialBenchmark (ex: "Public_AR_Current")
 4. vintage: a numerical ID or name that references what vintage of geography is desired for
             the geoLookup (only needed when returntype = geographies).
             General format: GeographyVintage_SpatialBenchmark. (ex: "Current_Census2010")
 5.a. address: A single line containing the full address to be searched
 or
 5.b. street, city, state, zip: – The address split into the parts indicated. Not all parts need to be specified.
 5.c coordinates: x,y values representing lat/long

In [12]:
import os
import pandas as pd
import requests
import sqlalchemy as sqla
import sys

pd.options.display.max_columns = 999

import censusgeocode as cg
from sqlalchemy import create_engine


DB_URI = os.getenv('OP_DWH')
engine = create_engine(DB_URI)

In [5]:
QUERY = """
select *
from data_ingest.open_payments_data_all_years
"""

In [6]:
with engine.begin() as conn:
    df = pd.read_sql(QUERY, conn)

In [7]:
df.head()

Unnamed: 0,change_type,covered_recipient_type,teaching_hospital_ccn,teaching_hospital_id,teaching_hospital_name,physician_profile_id,physician_first_name,physician_middle_name,physician_last_name,physician_name_suffix,recipient_primary_business_street_address_line1,recipient_primary_business_street_address_line2,recipient_city,recipient_state,recipient_zip_code,recipient_country,recipient_province,recipient_postal_code,physician_primary_type,physician_specialty,physician_license_state_code1,physician_license_state_code2,physician_license_state_code3,physician_license_state_code4,physician_license_state_code5,submitting_applicable_manufacturer_or_applicable_gpo_name,applicable_manufacturer_or_applicable_gpo_making_payment_id,applicable_manufacturer_or_applicable_gpo_making_payment_name,applicable_manufacturer_or_applicable_gpo_making_payment_state,applicable_manufacturer_or_applicable_gpo_making_payment_countr,total_amount_of_payment_usdollars,date_of_payment,number_of_payments_included_in_total_amount,form_of_payment_or_transfer_of_value,nature_of_payment_or_transfer_of_value,city_of_travel,state_of_travel,country_of_travel,physician_ownership_indicator,third_party_payment_recipient_indicator,name_of_third_party_entity_receiving_payment_or_transfer_of_val,charity_indicator,third_party_equals_covered_recipient_indicator,contextual_information,delay_in_publication_indicator,record_id,dispute_status_for_publication,product_indicator,name_of_associated_covered_drug_or_biological1,name_of_associated_covered_drug_or_biological2,name_of_associated_covered_drug_or_biological3,name_of_associated_covered_drug_or_biological4,name_of_associated_covered_drug_or_biological5,ndc_of_associated_covered_drug_or_biological1,ndc_of_associated_covered_drug_or_biological2,ndc_of_associated_covered_drug_or_biological3,ndc_of_associated_covered_drug_or_biological4,ndc_of_associated_covered_drug_or_biological5,name_of_associated_covered_device_or_medical_supply1,name_of_associated_covered_device_or_medical_supply2,name_of_associated_covered_device_or_medical_supply3,name_of_associated_covered_device_or_medical_supply4,name_of_associated_covered_device_or_medical_supply5,program_year,payment_publication_date,related_product_indicator,covered_or_noncovered_indicator_1,indicate_drug_or_biological_or_device_or_medical_supply_1,product_category_or_therapeutic_area_1,name_of_drug_or_biological_or_device_or_medical_supply_1,associated_drug_or_biological_ndc_1,covered_or_noncovered_indicator_2,indicate_drug_or_biological_or_device_or_medical_supply_2,product_category_or_therapeutic_area_2,name_of_drug_or_biological_or_device_or_medical_supply_2,associated_drug_or_biological_ndc_2,covered_or_noncovered_indicator_3,indicate_drug_or_biological_or_device_or_medical_supply_3,product_category_or_therapeutic_area_3,name_of_drug_or_biological_or_device_or_medical_supply_3,associated_drug_or_biological_ndc_3,covered_or_noncovered_indicator_4,indicate_drug_or_biological_or_device_or_medical_supply_4,product_category_or_therapeutic_area_4,name_of_drug_or_biological_or_device_or_medical_supply_4,associated_drug_or_biological_ndc_4,covered_or_noncovered_indicator_5,indicate_drug_or_biological_or_device_or_medical_supply_5,product_category_or_therapeutic_area_5,name_of_drug_or_biological_or_device_or_medical_supply_5,associated_drug_or_biological_ndc_5,full_address,physician_full_name
0,UNCHANGED,Covered Recipient Physician,,,,174740,LISA,,CAPALDINI,,45 CASTRO ST,STE 227,SAN FRANCISCO,CA,94114,United States,,,Medical Doctor,Allopathic & Osteopathic Physicians|Internal M...,CA,,,,,"AbbVie, Inc.",100000000204,"AbbVie, Inc.",IL,United States,4.29,2013-12-06,1,In-kind items and services,Food and Beverage,,,,No,No Third Party Payment,,,,,No,31094501,No,Covered,Androgel,,,,,0051-8462-33,,,,,,,,,,2013,2019-01-18,,,,,,,,,,,,,,,,,,,,,,,,,,,"45 CASTRO ST STE 227, SAN FRANCISCO, CA 94114",
1,UNCHANGED,Covered Recipient Physician,,,,154459,UTKU,,KANDEMIR,,1001 POTRERO AVENUE,RM 3A36,SAN FRANCISCO,CA,94110,United States,,,Medical Doctor,Allopathic & Osteopathic Physicians|Orthopaedi...,CA,,,,,Stryker Corporation,100000010503,Stryker Corporation,MI,United States,15.49,2013-09-27,1,Cash or cash equivalent,Food and Beverage,,,,No,No Third Party Payment,,,,,No,29893470,No,Covered,,,,,,,,,,,TRAUMA and EXTREMITIES,,,,,2013,2019-01-18,,,,,,,,,,,,,,,,,,,,,,,,,,,"1001 POTRERO AVENUE RM 3A36, SAN FRANCISCO, CA...",
2,UNCHANGED,Covered Recipient Physician,,,,820445,CAROL,K,LEE,,1518 NORIEGA ST,SUITE 200,SAN FRANCISCO,CA,94122-4434,United States,,,Medical Doctor,Allopathic & Osteopathic Physicians|Pediatrics,CA,,,,,"Galderma Laboratories, L.P.",100000010375,"Galderma Laboratories, L.P.",TX,United States,12.49,2013-08-01,1,In-kind items and services,Food and Beverage,,,,No,No Third Party Payment,,No,,,No,24534805,No,Covered,EPIDUO,,,,,,,,,,,,,,,2013,2019-01-18,,,,,,,,,,,,,,,,,,,,,,,,,,,"1518 NORIEGA ST SUITE 200, SAN FRANCISCO, CA 9...",
3,UNCHANGED,Covered Recipient Physician,,,,801,PAUL,,HEIM,,909 HYDE ST,SUITE 428,SAN FRANCISCO,CA,94109-4822,United States,,,Medical Doctor,Allopathic & Osteopathic Physicians|Psychiatry...,CA,,,,,Sunovion Pharmaceuticals Inc.,100000000254,Sunovion Pharmaceuticals Inc.,MA,United States,13.35,2013-10-31,1,In-kind items and services,Food and Beverage,,,,No,No Third Party Payment,,,,,No,101274004,No,Covered,Latuda,,,,,63402-304-30,,,,,,,,,,2013,2019-01-18,,,,,,,,,,,,,,,,,,,,,,,,,,,"909 HYDE ST SUITE 428, SAN FRANCISCO, CA 94109...",
4,UNCHANGED,Covered Recipient Physician,,,,338981,BRUCE,A.,CREE,,675 NELSON RISING LANE,UCSF MULTIPLE SCLEROSIS CENTER,SAN FRANCISCO,CA,94158,United States,,,Medical Doctor,Allopathic & Osteopathic Physicians|Psychiatry...,CA,,,,,Biogen Idec Inc.,100000000193,Biogen Idec Inc.,MA,United States,32.26,2013-09-21,1,Cash or cash equivalent,Food and Beverage,,,,No,No Third Party Payment,,No,,,No,23682087,No,Covered,TYSABRI,TECFIDERA,AVONEX,,,,,,,,,,,,,2013,2019-01-18,,,,,,,,,,,,,,,,,,,,,,,,,,,675 NELSON RISING LANE UCSF MULTIPLE SCLEROSIS...,


In [8]:
df['full_address'] = df['recipient_primary_business_street_address_line1'].fillna('') \
  + ' ' + df['recipient_primary_business_street_address_line2'].fillna('') \
  + ', ' + df['recipient_city'].fillna('') \
  + ', ' + df['recipient_state'].fillna('') \
  + ' ' + df['recipient_zip_code'].fillna('')

df['full_address'].replace({' ,': ','}, inplace=True, regex=True)

In [9]:
df.full_address.head()

0        45 CASTRO ST STE 227, SAN FRANCISCO, CA 94114
1    1001 POTRERO AVENUE RM 3A36, SAN FRANCISCO, CA...
2    1518 NORIEGA ST SUITE 200, SAN FRANCISCO, CA 9...
3    909 HYDE ST SUITE 428, SAN FRANCISCO, CA 94109...
4    675 NELSON RISING LANE UCSF MULTIPLE SCLEROSIS...
Name: full_address, dtype: object

In [10]:
example_address = df.full_address[0]

In [11]:
example_address

'45 CASTRO ST STE 227, SAN FRANCISCO, CA 94114'

In [16]:
result = cg.onelineaddress(example_address)

In [18]:
type(result)

censusgeocode.censusgeocode.AddressResult

In [28]:
latitude = result[0]['coordinates']['x']
longitude = result[0]['coordinates']['y']
census_block = result[0]['geographies']['2010 Census Blocks'][0]['BLOCK']
census_block_group = result[0]['geographies']['2010 Census Blocks'][0]['BLKGRP']
census_tract = result[0]['geographies']['2010 Census Blocks'][0]['TRACT']
census_geoid = result[0]['geographies']['2010 Census Blocks'][0]['GEOID']

In [29]:
print(latitude, longitude, census_block, census_block_group, census_tract, census_geoid)

-122.43574 37.76856 2000 2 016900 060750169002000
