**MVUM Cleaning**

In [None]:
%run ../bootstrap.py
setup_project_path()

from scripts.io_helpers import read_raw_layer, export_interim, read_interim_layer
from scripts.geometry_helpers import to_multilinestring, validate_geometry, drop_missing_geometry
from scripts.data_helpers import to_buffer_crs
from scripts import data_config as dc

Let's take a look at the data.

In [2]:
gdf = read_raw_layer("mvum_raw")

print(gdf.columns.tolist())
gdf.head()

['OBJECTID', 'RTE_CN', 'ID', 'NAME', 'BMP', 'EMP', 'SEG_LENGTH', 'GIS_MILES', 'SYMBOL', 'MVUM_SYMBO', 'JURISDICTI', 'OPERATIONA', 'SURFACETYP', 'SYSTEM', 'SEASONAL', 'PASSENGERV', 'PASSENGE_1', 'HIGHCLEARA', 'HIGHCLEA_1', 'TRUCK', 'TRUCK_DATE', 'BUS', 'BUS_DATESO', 'MOTORHOME', 'MOTORHOME_', 'FOURWD_GT5', 'FOURWD_G_1', 'TWOWD_GT50', 'TWOWD_GT_1', 'TRACKED_OH', 'TRACKED__1', 'OTHER_OHV_', 'OTHER_OH_1', 'ATV', 'ATV_DATESO', 'MOTORCYCLE', 'MOTORCYC_1', 'OTHERWHEEL', 'OTHERWHE_1', 'TRACKED__2', 'TRACKED__3', 'OTHER_OH_2', 'OTHER_OH_3', 'ADMINORG', 'SECURITYID', 'DISTRICTNA', 'FORESTNAME', 'FIELD_ID', 'SBS_SYMBOL', 'ROUTESTATU', 'GLOBALID', 'TA_SYMBOL', 'E_BIKE_CLA', 'E_BIKE_C_1', 'E_BIKE_C_2', 'E_BIKE_C_3', 'E_BIKE_C_4', 'E_BIKE_C_5', 'SHAPELEN', 'geometry']


Unnamed: 0,OBJECTID,RTE_CN,ID,NAME,BMP,EMP,SEG_LENGTH,GIS_MILES,SYMBOL,MVUM_SYMBO,...,GLOBALID,TA_SYMBOL,E_BIKE_CLA,E_BIKE_C_1,E_BIKE_C_2,E_BIKE_C_3,E_BIKE_C_4,E_BIKE_C_5,SHAPELEN,geometry
0,14,2468010421,733,TIMBER GULCH,0.0,5.05,5.05,4.973,3,"Roads open to highway legal vehicles only, Yea...",...,{563AF95C-EC1B-44BD-BA66-87B670C11EA3},4,,,,,,,0.093233,"LINESTRING (-104.00521 44.2677, -104.00519 44...."
1,15,561010421,336.1P,HINES BR 1P,0.15,0.97,0.82,0.834,4,"Roads open to highway legal vehicles only, Sea...",...,{64992FAC-6C6E-4A45-8F46-7792D7D5BFAB},6,,,,,,,0.013598,"LINESTRING (-103.57186 43.69657, -103.57187 43..."
2,39,135010421,653.1O,STAGG BR 1O,0.0,0.7,0.7,0.674,3,"Roads open to highway legal vehicles only, Yea...",...,{1F53B92D-A05B-42D5-93B8-C4EB889EAF0B},6,,,,,,,0.012187,"LINESTRING (-103.64118 43.71517, -103.6412 43...."
3,45,2296010421,858.1,PETERSON RD,0.0,1.5,1.5,1.517,1,"Roads open to all Vehicles, Yearlong",...,{A18C47E0-1E0B-448C-95EB-E5F05030316B},6,,,,,,,0.026087,"LINESTRING (-104.41607 44.48953, -104.41586 44..."
4,46,1342010421,278.1D,LITHOGRAPH BR 1D,0.0,0.52,0.52,0.509,3,"Roads open to highway legal vehicles only, Yea...",...,{C99CBCC1-08CC-40D5-AF32-E977B5D70722},6,,,,,,,0.009512,"LINESTRING (-103.80682 43.72761, -103.80702 43..."


In [3]:
columns_keep = ['ID', 'NAME', 'SEG_LENGTH','SYMBOL', 'MVUM_SYMBO', 
                'JURISDICTI', 'OPERATIONA', 'SURFACETYP', 'SYSTEM', 'SEASONAL', 
                'FORESTNAME', 'ROUTESTATU', 
                'SHAPELEN', 'geometry']

gdf_filtered = gdf[columns_keep]
gdf_filtered.head()

Unnamed: 0,ID,NAME,SEG_LENGTH,SYMBOL,MVUM_SYMBO,JURISDICTI,OPERATIONA,SURFACETYP,SYSTEM,SEASONAL,FORESTNAME,ROUTESTATU,SHAPELEN,geometry
0,733,TIMBER GULCH,5.05,3,"Roads open to highway legal vehicles only, Yea...",FS - FOREST SERVICE,3 - SUITABLE FOR PASSENGER CARS,AGG - CRUSHED AGGREGATE OR GRAVEL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,EX - EXISTING,0.093233,"LINESTRING (-104.00521 44.2677, -104.00519 44...."
1,336.1P,HINES BR 1P,0.82,4,"Roads open to highway legal vehicles only, Sea...",FS - FOREST SERVICE,2 - HIGH CLEARANCE VEHICLES,NAT - NATIVE MATERIAL,NFSR - NATIONAL FOREST SYSTEM ROAD,seasonal,Black Hills National Forest,EX - EXISTING,0.013598,"LINESTRING (-103.57186 43.69657, -103.57187 43..."
2,653.1O,STAGG BR 1O,0.7,3,"Roads open to highway legal vehicles only, Yea...",FS - FOREST SERVICE,2 - HIGH CLEARANCE VEHICLES,NAT - NATIVE MATERIAL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,EX - EXISTING,0.012187,"LINESTRING (-103.64118 43.71517, -103.6412 43...."
3,858.1,PETERSON RD,1.5,1,"Roads open to all Vehicles, Yearlong",FS - FOREST SERVICE,2 - HIGH CLEARANCE VEHICLES,NAT - NATIVE MATERIAL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,EX - EXISTING,0.026087,"LINESTRING (-104.41607 44.48953, -104.41586 44..."
4,278.1D,LITHOGRAPH BR 1D,0.52,3,"Roads open to highway legal vehicles only, Yea...",FS - FOREST SERVICE,2 - HIGH CLEARANCE VEHICLES,NAT - NATIVE MATERIAL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,EX - EXISTING,0.009512,"LINESTRING (-103.80682 43.72761, -103.80702 43..."


Let's inspect Route Status, Seasonal, and Jurisdiction values.

In [4]:
print("Jurisdiction values: ", gdf_filtered['JURISDICTI'].unique())
print("Seasonal values: ", gdf_filtered['SEASONAL'].unique())
print("Route status values: ", gdf_filtered['ROUTESTATU'].unique())

Jurisdiction values:  ['FS - FOREST SERVICE' 'C - COUNTY, PARISH, BOROUGH'
 'OFS - OTHER FOREST SERVICE' None 'OFS - GALLATIN NF' 'P - PRIVATE' 'FS'
 'FS - Forest Service' 'S - STATE' 'OF - OTHER FEDERAL AGENCY'
 'SH - STATE HIGHWAY' 'L - LOCAL' 'FOREST SERVICE'
 'BLM - BUREAU OF LAND MANAGEMENT']
Seasonal values:  ['yearlong' 'seasonal' None '4/1 - 12/25' 'Seasonal']
Route status values:  ['EX - EXISTING' None 'DE - DECOMMISSIONED' 'Existing' 'Ex - Existing'
 'PL - PLANNED' 'EXISTING' 'CV - CONVERTED']


Now, let's filter out MVUM roads that are unlikely to allow camping. First of all, let's keep rows where road status signifies that it exists. Then, let's standardize a Status column and drop the original for consistency.

In [12]:
status_keep = [
    'EX - EXISTING',
    'EXISTING'
]

gdf_status = gdf_filtered[gdf['ROUTESTATU'].fillna("").str.upper().isin(status_keep)]
print("Statuses: ", gdf_status['ROUTESTATU'].unique())

gdf_status['STATUS'] = 'EXISTING'
gdf_status.drop(columns=['ROUTESTATU'], inplace=True)
gdf_status.head(1)

Statuses:  ['EX - EXISTING' 'Existing' 'Ex - Existing' 'EXISTING']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf_status.drop(columns=['ROUTESTATU'], inplace=True)


Unnamed: 0,ID,NAME,SEG_LENGTH,SYMBOL,MVUM_SYMBO,JURISDICTI,OPERATIONA,SURFACETYP,SYSTEM,SEASONAL,FORESTNAME,SHAPELEN,geometry,STATUS
0,733,TIMBER GULCH,5.05,3,"Roads open to highway legal vehicles only, Yea...",FS - FOREST SERVICE,3 - SUITABLE FOR PASSENGER CARS,AGG - CRUSHED AGGREGATE OR GRAVEL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,0.093233,"LINESTRING (-104.00521 44.2677, -104.00519 44....",EXISTING


Next, let's keep rows where Forest Service has jurisdiction. MVUM rules affect camping legality primarily in National Forest. Here are the current values: 

['FS - FOREST SERVICE' 'C - COUNTY, PARISH, BOROUGH'
 'OFS - OTHER FOREST SERVICE' None 'OFS - GALLATIN NF' 'P - PRIVATE' 'FS'
 'FS - Forest Service' 'S - STATE' 'OF - OTHER FEDERAL AGENCY'
 'SH - STATE HIGHWAY' 'L - LOCAL' 'FOREST SERVICE'
 'BLM - BUREAU OF LAND MANAGEMENT']

Then, we can simplify a new Jurisdiction feature to "FS" for some metadata, and drop the original column. 

In [13]:
jurisdiction_keywords = [
    'FS',
    'FOREST SERVICE',
    'NF',
    'OFS'
]

gdf_juris = gdf_status[gdf_status['JURISDICTI'].fillna("").str.upper().apply(
        lambda x: any(keyword in x for keyword in jurisdiction_keywords)
    )]

print("Remaining jurisdiction values: ", gdf_juris['JURISDICTI'].unique())

# Simplify jurisdiction column and drop original

gdf_juris['JURISDICTION'] = 'FS'
gdf_juris.drop(columns=['JURISDICTI'], inplace=True)
gdf_juris.head(1)


Remaining jurisdiction values:  ['FS - FOREST SERVICE' 'OFS - OTHER FOREST SERVICE' 'OFS - GALLATIN NF'
 'FS' 'FS - Forest Service' 'FOREST SERVICE']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf_juris.drop(columns=['JURISDICTI'], inplace=True)


Unnamed: 0,ID,NAME,SEG_LENGTH,SYMBOL,MVUM_SYMBO,OPERATIONA,SURFACETYP,SYSTEM,SEASONAL,FORESTNAME,SHAPELEN,geometry,STATUS,JURISDICTION
0,733,TIMBER GULCH,5.05,3,"Roads open to highway legal vehicles only, Yea...",3 - SUITABLE FOR PASSENGER CARS,AGG - CRUSHED AGGREGATE OR GRAVEL,NFSR - NATIONAL FOREST SYSTEM ROAD,yearlong,Black Hills National Forest,0.093233,"LINESTRING (-104.00521 44.2677, -104.00519 44....",EXISTING,FS


**Geometry check**

Let's check what geometries are present.

In [15]:
print(gdf_juris.geometry.type.unique())

['LineString' None 'MultiLineString']


We want geometries to be consistent, so let's remove None instandces and cast all LineString instances to MultiLineString.

In [28]:
# Remove None instances
gdf_geom = drop_missing_geometry(gdf_juris)

# Cast to MultiLineString
gdf_geom = to_multilinestring(gdf_geom)
print("Unique geometry values: ", gdf_geom.geometry.type.unique())

# Validate data
print("Row count before validation: ", gdf_geom.shape[0])
gdf_geom = validate_geometry(gdf_geom)
print("Row count after validation: ", gdf_geom.shape[0])


Unique geometry values:  ['MultiLineString']
Row count before validation:  148401
Row count after validation:  148401


**Filtering to Colorado**

Last step! We need to project the data to the common buffer CRS, then intersect with the state_boundary_buffered.gpkg data in data/interim.

In [31]:
# Project to buffer CRS
mvum_projected = to_buffer_crs(gdf_geom)

# Read buffered state boundary layer
co_boundary = read_interim_layer("state_boundary_buffered")

# Intersect MVUM roads with Colorado state boundary to filter by state
mvum_intersected = mvum_projected[mvum_projected.geometry.intersects(co_boundary.geometry.union_all())]

print("Row count after filtering: ", mvum_intersected.shape[0])

Row count after filtering:  8765


Ready to export!

In [32]:
export_interim(mvum_intersected, "mvum_clean", driver="GPKG", verbose=True)

Saved to interim: /Users/loganproffitt/Desktop/CampGIS.nosync/Repo/CampGIS/data/interim/mvum_clean.gpkg
Also saved to processed: /Users/loganproffitt/Desktop/CampGIS.nosync/Repo/CampGIS/data/processed/mvum_clean.gpkg
