# Predicted Warehouses - Random Forest

<div class="alert alert-block alert-info">
    <strong> Import relavant libraries.</strong>
</div>

In [67]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import folium

#### Bring in current warehouses

In [2]:
# import data
all_parcels = pd.read_csv('data/join_scag_to_parcels_left_2019.csv',
                          usecols=['APN','lon', 'lat', 'num_warehouses'])
current_warehouses = all_parcels[all_parcels['num_warehouses'] >= 1]

# get geometry
current_warehouses = gpd.GeoDataFrame(current_warehouses, geometry = gpd.points_from_xy(current_warehouses['lon'], current_warehouses['lat'], crs = 'EPSG:4326'))

# calculate a 70 mile buffer around the Ontaria Airport & turn this into a geodataframe
airport=gpd.points_from_xy(x=[6683335.118285051], y=[1843271.4373799062], crs=2229) #2229
buffer=airport.buffer(369600)
buffer=gpd.GeoDataFrame(geometry=buffer,crs=2229)

# match projection to buffer
current_warehouses.to_crs(epsg=2229)

# warehouse map
warehouse_map = current_warehouses.explore(# this defines the field to "choropleth"
        legend=True,
        #cmap='RdYlGn_r', # the "_r" reverses the color
        tiles='CartoDB positron')

In [3]:
#warehouse_map

#### Predicted Warehouses

In [4]:
predictionstop20 = pd.read_csv('predictionstop20.csv')

In [5]:
predictionstop20.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1367210 entries, 0 to 1367209
Data columns (total 23 columns):
 #   Column                          Non-Null Count    Dtype  
---  ------                          --------------    -----  
 0   APN                             1367210 non-null  float64
 1   pred_noWH                       1367210 non-null  float64
 2   pred_WH                         1367210 non-null  float64
 3   LAND_VALUE                      1367210 non-null  float64
 4   ACRES                           1367210 non-null  float64
 5   dollars_per_acre                1367210 non-null  float64
 6   distances                       1367210 non-null  float64
 7   lon                             1367210 non-null  float64
 8   lat                             1367210 non-null  float64
 9   IL_RATIO                        1367210 non-null  float64
 10  warehouse_start                 1367210 non-null  int64  
 11  Industrial                      1367210 non-null  float64
 12  

In [6]:
predictionstop20_gdf = gpd.GeoDataFrame(predictionstop20, geometry=gpd.points_from_xy(predictionstop20['lon'], predictionstop20['lat'], crs = 'EPSG:4326'))
# match projection to buffer
predictionstop20_gdf.to_crs(epsg=2229)

Unnamed: 0,APN,pred_noWH,pred_WH,LAND_VALUE,ACRES,dollars_per_acre,distances,lon,lat,IL_RATIO,...,"Percent $100,000 - $124,999",Percent White alone,Percent Some other race alone,Percent some_college,Percent less_highschool,Percent Renter Occupied,Percent No schooling completed,Percent Two or more races:,Percent Owner Occupied,geometry
0,210080008.0,1.00,0.00,415583.0,1.018478,4.080434e+05,1276.949816,-117.349561,33.997017,0.77632,...,6.318681,28.393006,43.630308,32.540862,22.436850,47.802198,6.53789,0.000000,52.197802,POINT (6758821.116 1821934.574)
1,210060035.0,1.00,0.00,0.0,0.259053,0.000000e+00,1448.959298,-117.348550,33.997814,0.00000,...,6.318681,28.393006,43.630308,32.540862,22.436850,47.802198,6.53789,0.000000,52.197802,POINT (6759125.694 1822226.544)
2,210210051.0,1.00,0.00,154517.0,0.055572,2.780501e+06,960.746466,-117.352501,33.994702,2.14815,...,6.318681,28.393006,43.630308,32.540862,22.436850,47.802198,6.53789,0.000000,52.197802,POINT (6757935.345 1821086.388)
3,210210076.0,1.00,0.00,0.0,0.480909,0.000000e+00,885.698095,-117.352219,33.994711,0.00000,...,6.318681,28.393006,43.630308,32.540862,22.436850,47.802198,6.53789,0.000000,52.197802,POINT (6758020.772 1821089.921)
4,210210046.0,1.00,0.00,73581.0,0.053479,1.375885e+06,852.249225,-117.352879,33.994740,3.66671,...,6.318681,28.393006,43.630308,32.540862,22.436850,47.802198,6.53789,0.000000,52.197802,POINT (6757820.816 1821099.317)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1367205,23804224.0,0.98,0.02,2641640.0,5.829377,4.531599e+05,772.223693,-117.549903,34.064667,1.29043,...,13.772894,62.984694,13.801020,28.571429,9.054326,19.706960,0.56338,11.045918,80.293040,POINT (6697985.753 1846220.948)
1367206,23804218.0,0.94,0.06,1204760.0,3.100517,3.885675e+05,562.452750,-117.551645,34.065156,5.57409,...,13.772894,62.984694,13.801020,28.571429,9.054326,19.706960,0.56338,11.045918,80.293040,POINT (6697457.189 1846396.659)
1367207,23804228.0,0.98,0.02,980205.0,3.364663,2.913234e+05,272.673620,-117.548893,34.065947,1.54167,...,13.772894,62.984694,13.801020,28.571429,9.054326,19.706960,0.56338,11.045918,80.293040,POINT (6698289.542 1846688.144)
1367208,23804227.0,0.98,0.02,1322500.0,5.723182,2.310778e+05,157.860184,-117.550346,34.066046,3.91986,...,13.772894,62.984694,13.801020,28.571429,9.054326,19.706960,0.56338,11.045918,80.293040,POINT (6697849.297 1846722.196)


In [7]:
predictionstop20_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1367210 entries, 0 to 1367209
Data columns (total 24 columns):
 #   Column                          Non-Null Count    Dtype   
---  ------                          --------------    -----   
 0   APN                             1367210 non-null  float64 
 1   pred_noWH                       1367210 non-null  float64 
 2   pred_WH                         1367210 non-null  float64 
 3   LAND_VALUE                      1367210 non-null  float64 
 4   ACRES                           1367210 non-null  float64 
 5   dollars_per_acre                1367210 non-null  float64 
 6   distances                       1367210 non-null  float64 
 7   lon                             1367210 non-null  float64 
 8   lat                             1367210 non-null  float64 
 9   IL_RATIO                        1367210 non-null  float64 
 10  warehouse_start                 1367210 non-null  int64   
 11  Industrial                      1367210 no

In [100]:
len(predictionstop20_gdf[predictionstop20_gdf['pred_WH'] > 0])

23682

In [15]:
predictionstop20_25 = predictionstop20_gdf[predictionstop20_gdf['pred_WH'] >= 0.25]

In [83]:
len(predictionstop20_25)

176

In [21]:
predictionstop20_25['warehouse_start'].unique()

array([0, 1], dtype=int64)

In [84]:
len(predictionstop20_50)

5

In [86]:
predictionstop20_10 = predictionstop20_gdf[predictionstop20_gdf['pred_WH'] >= 0.10]

In [98]:
len(predictionstop20_10)

1531

In [88]:
predictionstop20_10_map = predictionstop20_10.explore(# this defines the field to "choropleth",
        legend=True,
        color='purple',
        tiles='CartoDB positron')

In [89]:
predictionstop20_10_map

In [64]:
predictionstop20_25_map = predictionstop20_25.explore(# this defines the field to "choropleth",
        legend=True,
        color='pink',
        tiles='CartoDB positron')

In [65]:
predictionstop20_25_map

In [22]:
predictionstop20_50 = predictionstop20_gdf[predictionstop20_gdf['pred_WH'] >= 0.5]

In [59]:
predictionstop20_50_map = predictionstop20_50.explore(# this defines the field to "choropleth"
        legend=True,
        color='red',
        tiles='CartoDB positron')

In [60]:
predictionstop20_50_map

In [96]:
m = current_warehouses.explore(# this defines the field to "choropleth"
        color='skyblue',
        legend=True,
        #cmap='RdYlGn_r', # the "_r" reverses the color
        tiles='CartoDB positron')

predictionstop20_10_map = predictionstop20_10.explore(# this defines the field to "choropleth",
        m=m,
        legend=True,
        color='pink',
        tiles='CartoDB positron')

predictionstop20_25_map = predictionstop20_25.explore(# this defines the field to "choropleth"
        m=m,
        legend=True,
        color='orange',
        tiles='CartoDB positron',
        style_kwds={
            'opacity':1})

predictionstop20_map = predictionstop20_50.explore(
        m = m,
        legend=True,
        color='red',
        tiles='CartoDB positron',
        style_kwds={
            'opacity':1})


In [97]:
predictionstop20_map

# Final Notebook Codeblock

In [None]:
# import data
predictionstop20 = pd.read_csv('predictionstop20.csv')

# convert to geodataframe
predictionstop20_gdf = gpd.GeoDataFrame(predictionstop20, geometry=gpd.points_from_xy(predictionstop20['lon'], predictionstop20['lat'], crs = 'EPSG:4326'))

# match projection to buffer
predictionstop20_gdf.to_crs(epsg=2229)

# Parcels with above 25% chance of warehouse
predictionstop20_25 = predictionstop20_gdf[predictionstop20_gdf['pred_WH'] >= 0.25]

# Parcels with above 50% change of warehouse
predictionstop20_50 = predictionstop20_gdf[predictionstop20_gdf['pred_WH'] >= 0.5]

# map
m = current_warehouses.explore(# this defines the field to "choropleth"
        color='skyblue',
        legend=True,
        #cmap='RdYlGn_r', # the "_r" reverses the color
        tiles='CartoDB positron')

predictionstop20_10_map = predictionstop20_10.explore(# this defines the field to "choropleth",
        m=m,
        legend=True,
        color='pink',
        tiles='CartoDB positron')

predictionstop20_25_map = predictionstop20_25.explore(# this defines the field to "choropleth"
        m=m,
        legend=True,
        color='orange',
        tiles='CartoDB positron',
        style_kwds={
            'opacity':1})

predictionstop20_map = predictionstop20_50.explore(
        m = m,
        legend=True,
        color='red',
        tiles='CartoDB positron',
        style_kwds={
            'opacity':1})


In [None]:
predictionstop20_map

The interactive map below visualizes predicted warehouses by 2030 based on "chance" generated by the RF model. 

#### **Legend:**
- <font color='light blue'>**Light Blue:**</font> Current warehouses
- <font color='orange'>**Orange:**</font> Parcels With Over 25% Chance
- <font color='red'>**Red:**</font> Parcels With Over 50% Chance