# preprocessCoreLogic #
**Author:** Andrew Larkin <br>
Developed for the HEI Transit Study <br>
**Summary:** given a large core logic csv file, reduce the variables to those needed for the wind analysis and restrict to within 5km of maternal residences

## part 1: import libraries and define global constants ##

In [None]:
import pandas as ps
import os
import arcpy
import const as gConst
arcpy.env.overwriteOutput=True

In [None]:
PARENT_FOLDER = const.WIND_FOLDER + "Corelogic_Building_Years/"
CORE_LOGIC_FILE = PARENT_FOLDER + "CoreLogicData/TaxCompiled.csv"

## part 2: load corelogic into python and remove unneeded variables and records without GIS coordinates ###

In [None]:
coreLogicData = ps.read_csv(CORE_LOGIC_FILE)
coreLogicData.head()

In [None]:
print(coreLogicData.keys())
print(coreLogicData.count()[0])

In [None]:
coreLogicData['yearbuilt'] = coreLogicData['yearbuilt'].fillna(-1)
coreLogicData['yearbuilt'] = coreLogicData['yearbuilt'].astype(int)

In [None]:
reduced = coreLogicData[['yearbuilt','storiesnumber','parcellevellatitude','parcellevellongitude']]
reduced = reduced[reduced['parcellevellatitude']>0]
reduced = reduced[reduced['parcellevellongitude']>-200]
reduced['storiesnumber'] = reduced['storiesnumber'].fillna(-1)
reduced['storiesnumber'] = reduced['storiesnumber'].astype(int)
reduced.to_csv(PARENT_FOLDER + "CoreLogicData/reduced.csv",index=False)

## part 3: load reduced core logic records into GIS  ##

In [None]:
#  These values based on your script
XFieldName = 'parcellevellongitude'
YFieldName = 'parcellevellatitude'
outFolder = PARENT_FOLDER + "/"
spatialRef = arcpy.SpatialReference(4326)
csvFilePath = PARENT_FOLDER + "CoreLogicData/reduced.csv"
coreLogicLayer = "coreLogicLayer"
coreLogicShapefile = const.WIND_FOLDER + "temp/intermediateProducts.gdb"
arcpy.MakeXYEventLayer_management(csvFilePath, XFieldName, YFieldName, coreLogicLayer, spatial_reference=spatialRef)
arcpy.FeatureClassToShapefile_conversion(coreLogicLayer, coreLogicShapefile)

## part 4: restrict core logic data points to within 5km of maternal residence locations ##

In [None]:
residenceLocations = const.WIND_FOLDER + "Birth_Addresses_Wind/births_shapefile/Births0716_Wind.shp"
outTable = const.WIND_FOLDER + "temp/intermediateProducts.gdb/coreNearResidence"
arcpy.GenerateNearTable_analysis(coreLogicLayer, residenceLocations, outTable, "5000 Meters")