# Minnesota BMSB Analysis: `Preprocessing`
##### Contributors: *Luke Zaruba*, *Mattie Gisselbeck*
##### Last Updated: 2023-04-19

In this notebook, an automated ETL pipeline is implemented to process and clean data for analysis. Before running this notebook, the `extract_data.ps1` script should be run to download several of the datasets needed.

In [7]:
# Import Packages
import os
import sys

import arcgis
import arcpy
import pandas as pd

# Append Path & Import ETL Module
sys.path.append("..")

from bmsb.etl import WeatherLoader, ObservationLoader

In [26]:
# Set up Workspace
gdb_directory = r"C:\gitFiles\minnesota-bmsb-analysis\data\gdb"
gdb_name = "bmsb_analysis"
full_gdb_path = os.path.join(gdb_directory, f"{gdb_name}.gdb")

# Check if GDB Folder Exists
if not os.path.exists(gdb_directory):
    os.mkdir(gdb_directory)
    arcpy.management.CreateFileGDB(gdb_directory, gdb_name)
    
    print("Directory & GDB created")

# Check if GDB Exists (if folder exists)
else:
    if not os.path.exists(full_gdb_path):
        arcpy.management.CreateFileGDB(gdb_directory, gdb_name)
        
        print("Directory exists, GDB created")
        
    else:
        print("Directory & GDB already exist at: ", full_gdb_path)

Directory & GDB already exist at:  C:\gitFiles\minnesota-bmsb-analysis\data\gdb\bmsb_analysis.gdb


In [27]:
# Set up Path Vars of Downloaded Datasets
cities_shp = r"C:\gitFiles\minnesota-bmsb-analysis\data\cities\city_township_unorg.shp"
elevation_gdb_rast = r"C:\gitFiles\minnesota-bmsb-analysis\data\elevation\elev_30m_digital_elevation_model.gdb\digital_elevation_model_30m"
landcover_tiff = r"C:\gitFiles\minnesota-bmsb-analysis\data\landcover\NLCD_2019_Land_Cover.tif"

### Rasters

In [28]:
# Load Elevation to GDB
arcpy.management.CopyRaster(elevation_gdb_rast, os.path.join(full_gdb_path, "elevation"))

In [29]:
# Load LC to GDB
arcpy.management.CopyRaster(landcover_tiff, os.path.join(full_gdb_path, "landcover"))

In [30]:
# Reclassify Landcover to Urban, Ag, Natural
reclassified_lc = arcpy.sa.Reclassify(
    in_raster=os.path.join(full_gdb_path, "landcover"),
    reclass_field="NLCD_Land",
    remap="Unclassified NODATA;'Open Water' 3;'Developed, Open Space' 1;'Developed, Low Intensity' 1;'Developed, Medium Intensity' 1;'Developed, High Intensity' 1;'Barren Land' 3;'Deciduous Forest' 3;'Evergreen Forest' 3;'Mixed Forest' 3;Shrub/Scrub 3;Herbaceous 3;Hay/Pasture 2;'Cultivated Crops' 2;'Woody Wetlands' 3;'Emergent Herbaceous Wetlands' 3",
    missing_values="DATA"
)

reclassified_lc.save(os.path.join(full_gdb_path, "reclass_lc"))

### Cities

In [31]:
# Load Cities to GDB
arcpy.conversion.ExportFeatures(
    cities_shp,
    os.path.join(full_gdb_path, "cities"),
    where_clause="CTU_CLASS = 'CITY'",
)