### Creat a CSV with county, town of each solar farm in lottery
 
Sara Johns

Date created: 03/13/2023

Date last modified: 03/13/2023

In [None]:
# Set up

# Import libraries
import pandas as pd
import numpy as np
import geopandas

# Data directory
data_path = '/Users/sarajohns/Google Drive/My Drive/Farmers_Solar/data/'

# Read in geocoded lottery data
lottery = pd.read_csv(data_path + 'ipa_lottery/processed/final_lottery_locations.csv')

### Get clean town and zip code using regular expressions

In [None]:
# Zip code is always last 5 numbers in address
lottery['zip'] = lottery['Address'].str[-5:]

# Format to get town slightly different for addresses with actual address vs lat lon
regex = r"^(?:.*,)?\s*(.*?),\s*[A-Z]{2},?\s*\d{5}$"
lottery['town1'] = lottery['Address'].str.extract(regex)
lottery.loc[lottery['town1'].isna(), 'town1'] = lottery['town'].str[:-5]

### Map to county

In [None]:
# Make lottery geopandas object
lot_loc = geopandas.GeoDataFrame(lottery, 
                                 geometry=geopandas.points_from_xy(lottery.lon, lottery.lat))


# Get county shapefiles
county = geopandas.GeoDataFrame.from_file(data_path + 'shapefile/IL_BNDY_County/IL_BNDY_County_Py.shp')

# Set CRS of lottery points to same as county shapefile
lot_loc = lot_loc.set_crs(county.crs)

# Spatial join: lottery points to counties
lot_county = geopandas.sjoin(county, lot_loc, how='inner')

# Not all join- some missing lat/lon, some lat/lon outside boundary
# Keep just application number and county and merge to lottery table
lot_county_small = lot_county[['Application ID', 'COUNTY_NAM']]

# Merge to original lottery table
lottery_merge = pd.merge(lottery, lot_county_small, on = ['Application ID'], how = 'outer')

### Get counties from zip code for ones where we are missing lat/lon

In [None]:
# read in data
zip_county_cw = pd.read_excel(data_path + 'HUD/ZIP_COUNTY_062019.xlsx')

In [None]:
# format variables
zip_county_cw['zip'] = zip_county_cw['zip'].astype(str)

In [None]:
# merge on zip to lottery
lottery_merge = pd.merge(lottery_merge, zip_county_cw, on = ['zip'], how = 'left')

# format county fips to match county dataset
lottery_merge['county'] = lottery_merge['county'].fillna(0).astype(int).astype(str)
lottery_merge['county'] = lottery_merge['county'].str[2:]

# get county name and fips, format
county_name_fips = county[['COUNTY_NAM', 'CO_FIPS']]
county_name_fips['county'] = county_name_fips['CO_FIPS'].fillna(0).astype(int).astype(str)
county_name_fips['county'] = county_name_fips['county'].str.zfill(3)
county_name_fips

# merge to get county names from FIPS
lottery_merge = pd.merge(lottery_merge, county_name_fips, on = ['county'], how = 'left')

In [None]:
# clean up dataset 

lottery_merge['county_name'] = lottery_merge['COUNTY_NAM_x']
lottery_merge.loc[lottery_merge['COUNTY_NAM_x'].isna(), 'county_name'] = lottery_merge['COUNTY_NAM_y']
lottery_merge['town'] = lottery_merge['town1']

In [None]:
lottery_final = lottery_merge[['Application ID', 'Project Name', 'Name of Approved Vendor', 
                              'group', 'lat', 'lon', 'Size (kW AC)', 
                              'town', 'zip', 'county_name', 'Address', 'Ordinal', 
                              'Lottery Status', 'Block', 'Queue Number', 'Small Subscribers', 
                              'formatted_address', 'accuracy', 'type', 'clean_add', 'man_update', 
                              'lat_deg_dec_min', 'lon_deg_dec_min']]

In [None]:
# Save this dataset
lottery_final.to_csv(data_path + 'ipa_lottery/processed/final_lottery_locations_with_co_town_zip.csv')