Index: 02

Date: 2021/11/08

In [1]:
import pandas as pd
import numpy as np
import geopandas
import shapely
from geopandas.tools import sjoin
from os import listdir

In [2]:
# Import farm coordinates from the excel file as a dataframe.
farm_coordinates = pd.read_excel('group_a_addresses_geocoded_manual_updates.xlsx', index_col=0)

# Change the points dataframe to a GeoDataFrame with geometry given by longitude and latitude variables.
point_all = geopandas.GeoDataFrame(
    farm_coordinates, 
    geometry=geopandas.points_from_xy(farm_coordinates.longitude, farm_coordinates.latitude))

# Set Coordinate Reference System (CRS) to be epsg:4326, or the WGS84 latitude-longitude projection.
point_all = point_all.set_crs(epsg=4326)

In [4]:
def merge_point_poly(poly_file_path, point_all):
    """
    Find polygons that have at least a target point inside their boundaries.
    Input: poly_file_path: str, file path of the shapefile of polygons.
           point_all: GeoDataFrame, GeoDataFrame of points.
    Output: GeoDataFrame, GeoDataFrame of polygons that has at least one target point inside.
    """
    # Construct a GeoDataFrame of polygons from the shapefile.
    poly = geopandas.GeoDataFrame.from_file(poly_file_path)
    # Transform all geometries in a point_all to crs of the polygon shapfile. 
    point = point_all.to_crs(poly.crs)
    # Spatial join of point and poly, and keep geometry of poly as the geometry of the new shapefile.
    # (It only keeps the geometry from the left GeoDataFrame. 
    pointPolys = sjoin(poly, point, how='inner')
    # Select those entries without nan.
    # pointPolys[~(pointPolys.index_left.isna())]
    return pointPolys

In [7]:
point_county = merge_point_poly('cb_2018_us_county_500k/cb_2018_us_county_500k.shp', point_all)

In [9]:
# Import county level yields
