In [1]:
import pandas as pd
import geopandas as gpd

# 1. Take in a `.csv` of addresses and geocode them.

* Geocode address column by apply-ing geolocator.geocode 
* Create a geometry column
* Create a geodataframe called gdf from df
* Define the .crs =  {'init': 'epsg:4326'} 
* Drop the geocode column from the gdf 

In [3]:
from geopy.geocoders import Nominatim
from shapely.geometry import Point
from geopandas import GeoDataFrame

geolocator = Nominatim(user_agent='pratt_geospatial')

# address_csv = 'data/input/address.csv'
# what would be address_column? address_df[0]??
# address_df = pd.read_csv('data/input/address.csv')???

def geocode_dataframe(address_dataframe, address_column):
    df = address_dataframe.copy()

    # geocode address column by apply-ing geolocator.geocode 
    df['geocode'] = df['address'].apply(geolocator.geocode)
    
    # create a geometry column
    df['geometry'] = df['geocode'].apply(
        lambda x: Point(x.longitude, x.latitude)  # this is using shapely.geometry object (point)
    )
    
    # create a geodataframe called gdf from df and define the .crs =  {'init': 'epsg:4326'} 
    gdf = GeoDataFrame(df, geometry='geometry', crs={'init': 'epsg:4326', 'no_defs': True})

    # drop the geocode column from the gdf  
    gdf = gdf.drop(columns=['geocode']) # gdf = gdf.drop(['geocode'], axis=1)

    return gdf

# 2. Find out what states these addresses are in
* Read states_geojson file
* Spatial join to get associate state IDs with address points 
* Be extra careful with gdf's crs

In [5]:
# import geopandas as gpd

def get_state_ids(
        geodataframe_points,
        states_geojson='data/input/states.json',
):
    
    # geodataframe_points is already a GeoDataFrame
    # states_gepjson is from https://raw.githubusercontent.com/loganpowell/census-geojson/master/GeoJSON/5m/2018/state.json

    # read states_geojson (crs -> epsg:4326)
    states_gdf = gpd.read_file(states_geojson)[['GEOID', 'STUSPS', 'NAME', 'geometry']]

    # matching both gdf's crs just in case
    geodataframe_points.to_crs(states_gdf.crs, inplace=True)

    # spatial join to get associate state ids with address points
    points_sj_states = gpd.sjoin(
        geodataframe_points,
        states_gdf,
        how='left',
        op='intersects',    # within <-opposite-> contains
                            # points are within the states
    )

    return points_sj_states

# 3. Download and unzip those state Census Tract shapefiles
* Download the file from the download url = https://www2.census.gov/geo/tiger/TIGER2019/TRACT
* Unzip the file to saved_file

In [None]:
import urllib.request
import zipfile
import os.path

url_dir = 'https://www2.census.gov/geo/tiger/TIGER2019/TRACT'


def get_census_tracts(geoid, download_directory):
    download_url = f'{url_dir}/tl_2019_{geoid}_tract.zip'
    saved_file = f'{download_directory}/tl_2019_{geoid}_tract.zip'

    # download the file from the download url

    # unzip the file to saved_file 
    


# 4. What is this doing with census population data???

In [7]:
from census import Census
import pandas as pd


def get_census_tract_acs_data_for_state(
        api_key,
        acs_field_name,
        state_fips,
):
    c = Census(api_key)
    df = pd.DataFrame(
        c.acs5.state_county_tract(
            acs_field_name,
            state_fips,
            '*',
            '*',
        )
    )
    df['GEOID'] = df['state'].astype(str) + \
        df['county'].astype(str) + \
        df['tract'].astype(str)

    return df

# 5. Buffer the addresses by a given buffer distance of 2km
* Project to epsg=3857

In [None]:
def buffer_addresses(point_gdf, buffer_distance):
    # your code here
    
    
    # project to 3857
    point_gdf.to_crs(epsg=3857, inplace=True)
    
    # buffer by buffer distance
    point_gdf.buffer(buffer_distance)
    
    # You can do it in one line:
    # point_gdf.to_crs(epsg=3857).buffer(buffer_distance)
    
    return pts

# 6. Intersect and perform area-weighted interpolation to estimate population within Buffer distance.  Then sum the population in Buffer.

# 7. Print maps of the geocoded address, tract boundaries and buffers.

* Plot buffer_pol
* Plot tracts
* Plot buffer_int
* Plot point - be sure that it's using epsg=3857
* Center the map
* Add the base map

In [None]:
import matplotlib.pyplot as plt
import contextily as ctx


def create_map(point, buffer_pol, buffer_int, tracts, index):
    fig, ax = plt.subplots(figsize=(12, 12))

    # plot buffer_pol
    buffer_pol.to_crs(epsg=3857).plot(ax=ax, color='lightyellow')
    
    # plot tracts
    tracts.to_crs(epsg=3857).plot(ax=ax, color='yellow')
    
    # plot buffer_int
    
       
    # plot point - be sure that it's using epsg=3857
    point.to_crs(epsg=3857).plot(ax=ax, color='red', marker='X', size=5000)

    # center the map 
    ax.set_xlim(buffer_pol.bounds['minx'][index], buffer_pol.bounds['maxx'][index])
    ax.set_ylim(buffer_pol.bounds['miny'][index], buffer_pol.bounds['maxy'][index])

    population = buffer_int['pop_est'].sum()  # calc sum pop for buffer_int
    plt.title(f'{buffer["address"][index]} - Population: {population:,.0f}')  # add title

    ctx.add_basemap(
        ax,
        url='https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png'
    )  # add basemap

    ax.axis('off')  # turn off x, y axis lines

    plt.savefig(f'maps/map_{index}_int_tract_pop.png')
