# Exploring and Preparing Land Prices Data by Census Tract 

[Data Source](https://www.fhfa.gov/PolicyProgramsResearch/Research/Pages/wp1901.aspx)


[FAQs](https://www.fhfa.gov/PolicyProgramsResearch/Research/PaperDocuments/FAQs-Land-10-28-20.pdf)

In this notebook, I will prepare our land values data for use in our decision tree for our random forest evaluation. I will filter down to the census tracts within the areas we are interested in, I will clean the data and look for NAs and duplicates, I will join the data to census tract boundaries, and I will prepare the data to be spatially joined to the parcel level SCAG data. 

In [11]:
import pandas as pd

alldf = pd.read_csv('land_vals.csv', encoding_errors = 'ignore')

In [12]:
alldf.sample(5)

Unnamed: 0,State,County,Census Tract,"Land Value\n(1/4 Acre Lot, Standardized)","Land Value\n(Per Acre, As-Is)",Land Share of Property Value,Lot Size,Interior Square Feet,Property Value (Standardized),Property Value (As-is)
6726,California,Sonoma County,6097153003,166100,999200,0.33,5710,1430,544900,397500
47440,Texas,Tarrant County,48439110203,31200,145400,0.154,8250,2150,172000,178400
22230,Maryland,Baltimore city,24510130100,59600,513600,0.161,2810,2370,211700,205200
25100,Michigan,Saginaw County,26145012900,8400,12600,0.173,50220,1150,127800,84300
24893,Michigan,Oakland County,26125170400,89500,556200,0.376,5900,1230,333400,200200


In [13]:
#filter down to counties of interest
socal = alldf[(alldf.County == 'Riverside County')|(alldf.County == 'San Bernardino County')
              |(alldf.County == 'Los Angeles County')]
socal.sample(10)

Unnamed: 0,State,County,Census Tract,"Land Value\n(1/4 Acre Lot, Standardized)","Land Value\n(Per Acre, As-Is)",Land Share of Property Value,Lot Size,Interior Square Feet,Property Value (Standardized),Property Value (As-is)
5364,California,San Bernardino County,6071011401,69200,379800,0.29,7110,1120,317500,213600
2874,California,Los Angeles County,6037265420,1800500,8295600,0.802,8830,2820,1830300,2098000
4438,California,Riverside County,6065040903,134900,619500,0.443,8770,1340,376600,281800
2890,California,Los Angeles County,6037270300,854300,4711900,0.796,6700,1880,1050000,910800
2990,California,Los Angeles County,6037311500,587900,3266400,0.682,6780,1640,922500,745300
5294,California,San Bernardino County,6071009119,36000,38400,0.201,58620,2000,220000,257400
2726,California,Los Angeles County,6037139502,621000,3482100,0.68,6700,1830,915500,788700
5240,California,San Bernardino County,6071005600,59100,329200,0.307,7040,1130,264800,173500
2883,California,Los Angeles County,6037269000,1651100,7592400,0.738,8840,2980,1758200,2089700
4763,California,Riverside County,6065940400,46600,253300,0.241,6970,1160,241800,168300


In [14]:
import geopandas as gpd

tracts = gpd.read_file('tl_2019_06_tract/tl_2019_06_tract.shp')
tracts.dtypes

STATEFP       object
COUNTYFP      object
TRACTCE       object
GEOID         object
NAME          object
NAMELSAD      object
MTFCC         object
FUNCSTAT      object
ALAND          int64
AWATER         int64
INTPTLAT      object
INTPTLON      object
geometry    geometry
dtype: object

In [15]:
tracts['GEOID'] = tracts['GEOID'].astype('float')
tracts.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,6,37,139301,6037139000.0,1393.01,Census Tract 1393.01,G5020,S,2865657,0,34.1781538,-118.5581265,"POLYGON ((-118.57150 34.17758, -118.57148 34.1..."
1,6,37,139302,6037139000.0,1393.02,Census Tract 1393.02,G5020,S,338289,0,34.176723,-118.5383655,"POLYGON ((-118.54073 34.18019, -118.54070 34.1..."
2,6,37,139502,6037140000.0,1395.02,Census Tract 1395.02,G5020,S,1047548,0,34.1628402,-118.526311,"POLYGON ((-118.53225 34.16201, -118.53177 34.1..."
3,6,37,139600,6037140000.0,1396.0,Census Tract 1396,G5020,S,2477482,0,34.1640599,-118.5101001,"POLYGON ((-118.51858 34.15858, -118.51858 34.1..."
4,6,37,139701,6037140000.0,1397.01,Census Tract 1397.01,G5020,S,3396396,2411,34.157429,-118.4954117,"POLYGON ((-118.50980 34.15691, -118.50848 34.1..."


In [37]:
socal = socal.rename(columns = {'Census Tract':'GEOID'})
socal['GEOID'] = socal['GEOID'].astype('float')
socal = socal.set_index('GEOID')

In [38]:
tracts = tracts.set_index('GEOID')

In [39]:
socal.shape

(1889, 9)

In [40]:
tracts.head()

Unnamed: 0_level_0,STATEFP,COUNTYFP,TRACTCE,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
6037139000.0,6,37,139301,1393.01,Census Tract 1393.01,G5020,S,2865657,0,34.1781538,-118.5581265,"POLYGON ((-118.57150 34.17758, -118.57148 34.1..."
6037139000.0,6,37,139302,1393.02,Census Tract 1393.02,G5020,S,338289,0,34.176723,-118.5383655,"POLYGON ((-118.54073 34.18019, -118.54070 34.1..."
6037140000.0,6,37,139502,1395.02,Census Tract 1395.02,G5020,S,1047548,0,34.1628402,-118.526311,"POLYGON ((-118.53225 34.16201, -118.53177 34.1..."
6037140000.0,6,37,139600,1396.0,Census Tract 1396,G5020,S,2477482,0,34.1640599,-118.5101001,"POLYGON ((-118.51858 34.15858, -118.51858 34.1..."
6037140000.0,6,37,139701,1397.01,Census Tract 1397.01,G5020,S,3396396,2411,34.157429,-118.4954117,"POLYGON ((-118.50980 34.15691, -118.50848 34.1..."


In [41]:
socalGdf = socal.join(tracts[['geometry']], on= 'GEOID')
socalGdf.sample(3)
socalGdf.head()

Unnamed: 0_level_0,State,County,"Land Value\n(1/4 Acre Lot, Standardized)","Land Value\n(Per Acre, As-Is)",Land Share of Property Value,Lot Size,Interior Square Feet,Property Value (Standardized),Property Value (As-is),geometry
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
6037101000.0,California,Los Angeles County,325200,1722700,0.568,7100,1400,668000,494500,"POLYGON ((-118.30229 34.25870, -118.30091 34.2..."
6037101000.0,California,Los Angeles County,327100,1233000,0.538,11340,1850,652600,596600,"POLYGON ((-118.30334 34.27371, -118.30330 34.2..."
6037101000.0,California,Los Angeles County,302800,1467400,0.503,7840,1600,660200,525100,"POLYGON ((-118.29945 34.25598, -118.29792 34.2..."
6037101000.0,California,Los Angeles County,325600,1684900,0.554,7170,1400,683800,500800,"POLYGON ((-118.28592 34.25227, -118.28592 34.2..."
6037101000.0,California,Los Angeles County,406500,1826100,0.621,8890,1580,733100,599700,"POLYGON ((-118.27822 34.25068, -118.27822 34.2..."


In [1]:
import pandas as pd

riverside = pd.read_csv('riv_parcels_values.csv')

  riverside = pd.read_csv('riv_parcels_values.csv')


In [3]:
riverside.sample(10)

Unnamed: 0,OID_,APN,FLAG,MAIL_STREET,MAIL_CITY,SITUS_STREET,SITUS_CITY,STREET_NUMBER,STREET_PREDIRECTION,STREET_NAME,...,LOT,BLOCK,CAME_FROM,TAX_RATE_AREA,LAND,STRUCTURES,PRIMARY_OWNER,ALL_OWNER_LIST,SHAPE_Length,SHAPE_Area
789614,789615,164103003,,1056 CASPEN AVE,CITY INDUSTRY CA 91789,6698 PASO FINO ST,CORONA CA 92880,6698.0,,PASO FINO,...,95,,"134050012,134050013,134050014,134050015,134050...",27002.0,136144.0,346564.0,,,346.662247,7281.625575
85189,85190,602290033,EX,68910 ADELINA RD,CATHEDRAL CY CA 92234,,,,,,...,I,,"602010001,602010002,602010003,602010004,602010...",,,,,,1176.780015,15269.173568
374040,374041,172381002,,13077 23RD ST,ETIWANDA CA 91739,3860 BUCHANAN ST,RIVERSIDE CA 92503,3860.0,,BUCHANAN,...,6,,"139051002,139051003,139240027,139451001,139562...",9037.0,589906.0,0.0,,,721.265364,28192.423498
173820,173821,616141028,,47800 MADISON UNIT 132,INDIO CA 92201,47800 MADISON #132,INDIO CA 92201,47800.0,,MADISON,...,132,,615070010,7041.0,23543.0,70645.0,,,271.054655,4410.155001
794503,794504,107273012,,959 WINSTON CIR,CORONA CA 92881,959 WINSTON CIR,CORONA CA 92881,959.0,,WINSTON,...,109,,107140030,4040.0,99741.0,256110.0,,,310.345453,5874.637236
237849,237850,428062014,,1341 NEWTON ST,BEAUMONT CA 92223,1341 NEWTON ST,BEAUMONT CA 92223,1341.0,,NEWTON,...,14,,"421020004,421020007,421060010,421060011,421060...",2012.0,53609.0,280916.0,,,331.75601,6485.065289
803035,803036,102443040,,3356 AMY DR,CORONA CA 92882,3356 AMY DR,CORONA CA 92882,3356.0,,AMY,...,73,,102360022,4047.0,126237.0,302967.0,,,300.61818,5472.475234
789004,789005,152672001,,7614 CABRILLO WAY,EASTVALE CA 92880,7614 CABRILLO WAY,CORONA CA 92880,7614.0,,CABRILLO,...,176,,152040050,27002.0,127066.0,754930.0,,,350.204386,7357.161917
764292,764293,130123009,,4320 CEDAR AVE,NORCO CA 92860,4320 CEDAR AVE,NORCO CA 92860,4320.0,,CEDAR,...,96,,090300366,15002.0,73851.0,217148.0,,,423.538141,11001.015072
93806,93807,638112017,,122 WATERFORD CIR,RANCHO MIRAGE CA 92270,,,,,,...,99,,,14005.0,47816.0,0.0,,,502.010797,15110.713435


In [5]:
riverside.shape

(827053, 35)

In [6]:
la = pd.read_csv('LA_County_Parcels.csv')
la.sample(5)

  la = pd.read_csv('LA_County_Parcels.csv')


Unnamed: 0,OBJECTID,AIN,APN,SitusHouseNo,SitusFraction,SitusDirection,SitusUnit,SitusStreet,SitusAddress,SitusCity,...,LegalDescLine5,LegalDescLineLast,LegalDescription,CENTER_LAT,CENTER_LON,CENTER_X,CENTER_Y,LAT_LON,ShapeSTArea,ShapeSTLength
2391657,2391658,8712017014,8712-017-014,1430.0,,,,HEIDELBERG AVE,1430 HEIDELBERG AVE,WALNUT CA,...,,,TRACT NO 19218 LOT 13,34.043769,-117.856666,6605089.0,1838342.0,"34.043769, -117.856666",8418.081055,368.259804
1535817,1535818,6058029004,6058-029-004,2044.0,,W,,105TH ST,2044 W 105TH ST,LOS ANGELES CA,...,,,TRACT NO 13235 LOT 134,33.940774,-118.314986,6466127.0,1800978.0,"33.940774, -118.314986",9422.197266,464.162633
1928764,1928765,7381009001,7381-009-001,20234.0,,,,GALWAY AVE,20234 GALWAY AVE,CARSON CA,...,,,TRACT NO 29001 LOT 1,33.847778,-118.26027,6482635.0,1767087.0,"33.847778, -118.260270",6068.112305,317.630743
2221016,2221017,8390003019,8390-003-019,561.0,,E,,BELLGROVE ST,561 E BELLGROVE ST,SAN DIMAS CA,...,,,TRACT NO 27308 LOT 53,34.111291,-117.796177,6623363.0,1862946.0,"34.111291, -117.796177",8137.182617,384.489437
864059,864060,4258013008,4258-013-008,2738.0,,,,CEILHUNT AVE,2738 CEILHUNT AVE,LOS ANGELES CA,...,,,TRACT # 16842 LOT 94,34.026364,-118.436944,6429270.0,1832264.0,"34.026364, -118.436944",5304.293945,310.007064


In [7]:
la.shape

(2422293, 91)