# preprocessCoreLogic #
**Author:** Andrew Larkin <br>
Developed for the HEI Transit Study <br>
**Summary:** given a large core logic csv file, reduce the variables to those needed for the wind analysis and restrict to within 5km of maternal residences

## part 1: import libraries and define global constants ##

In [1]:
import pandas as ps
import os
import arcpy
import const as gConst
arcpy.env.overwriteOutput=True

In [2]:
PARENT_FOLDER = const.WIND_FOLDER + "Corelogic_Building_Years/"
CORE_LOGIC_FILE = PARENT_FOLDER + "CoreLogicData/TaxCompiled.csv"

## part 2: load corelogic into python and remove unneeded variables and records without GIS coordinates ###

In [20]:
coreLogicData = ps.read_csv(CORE_LOGIC_FILE)
coreLogicData.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,fipscode,apnparcelnumberunformatted,apnsequencenumber,compositepropertylinkagekey,previousparcelnumber,situshousenumber,situshousenumbersuffix,situshousenumber2,situsdirection,situsstreetname,...,apnparcelnumberunformatted_03,previousparcelnumber_03,apnparcelnumberunformatted_02,previousparcelnumber_02,apnparcelnumberunformatted_01,previousparcelnumber_01,lastobserved,firstobserved,missingyearbuilt,disappears
0,48001,M100045,1,48001M100045 ...,,1838,,,E,FM 323,...,,,,,,,12,12,0,1
1,48001,M100170,1,48001M100170 ...,,620,,,,AN COUNTY ROAD 2419,...,,,,,,,12,12,0,1
2,48001,M100343,1,48001M100343 ...,,1769,,,,AN COUNTY ROAD 2419,...,,,,,,,12,12,0,1
3,48001,M100353,1,48001M100353 ...,,4671,,,,FM 322,...,,,,,,,12,12,0,1
4,48001,M100408,1,48001M100408 ...,,2463,,,,AN COUNTY ROAD 318,...,,,,,,,12,12,0,1


In [21]:
print(coreLogicData.keys())
print(coreLogicData.count()[0])

Index(['fipscode', 'apnparcelnumberunformatted', 'apnsequencenumber',
       'compositepropertylinkagekey', 'previousparcelnumber',
       'situshousenumber', 'situshousenumbersuffix', 'situshousenumber2',
       'situsdirection', 'situsstreetname', 'situsmode', 'situsquadrant',
       'situsunitnumber', 'situscity', 'situsstate', 'situszipcode',
       'yearbuilt', 'storiesnumber', 'blocklevellatitude',
       'blocklevellongitude', 'parcellevellatitude', 'parcellevellongitude',
       'apnparcelnumberunformatted_12', 'previousparcelnumber_12',
       'apnparcelnumberunformatted_11', 'previousparcelnumber_11',
       'apnparcelnumberunformatted_10', 'previousparcelnumber_10',
       'apnparcelnumberunformatted_09', 'previousparcelnumber_09',
       'apnparcelnumberunformatted_08', 'previousparcelnumber_08',
       'apnparcelnumberunformatted_07', 'previousparcelnumber_07',
       'apnparcelnumberunformatted_06', 'previousparcelnumber_06',
       'apnparcelnumberunformatted_05', 'previ

In [22]:
coreLogicData['yearbuilt'] = coreLogicData['yearbuilt'].fillna(-1)
coreLogicData['yearbuilt'] = coreLogicData['yearbuilt'].astype(int)

In [23]:
reduced = coreLogicData[['yearbuilt','storiesnumber','parcellevellatitude','parcellevellongitude']]
reduced = reduced[reduced['parcellevellatitude']>0]
reduced = reduced[reduced['parcellevellongitude']>-200]
reduced['storiesnumber'] = reduced['storiesnumber'].fillna(-1)
reduced['storiesnumber'] = reduced['storiesnumber'].astype(int)
reduced.to_csv(PARENT_FOLDER + "CoreLogicData/reduced.csv",index=False)

## part 3: load reduced core logic records into GIS  ##

In [5]:
#  These values based on your script
XFieldName = 'parcellevellongitude'
YFieldName = 'parcellevellatitude'
outFolder = PARENT_FOLDER + "/"
spatialRef = arcpy.SpatialReference(4326)
csvFilePath = PARENT_FOLDER + "CoreLogicData/reduced.csv"
coreLogicLayer = "coreLogicLayer"
coreLogicShapefile = const.WIND_FOLDER + "temp/intermediateProducts.gdb"
arcpy.MakeXYEventLayer_management(csvFilePath, XFieldName, YFieldName, coreLogicLayer, spatial_reference=spatialRef)
arcpy.FeatureClassToShapefile_conversion(coreLogicLayer, coreLogicShapefile)

## part 4: restrict core logic data points to within 5km of maternal residence locations ##

In [None]:
residenceLocations = const.WIND_FOLDER + "Birth_Addresses_Wind/births_shapefile/Births0716_Wind.shp"
outTable = const.WIND_FOLDER + "temp/intermediateProducts.gdb/coreNearResidence"
arcpy.GenerateNearTable_analysis(coreLogicLayer, residenceLocations, outTable, "5000 Meters")