# Future Land Use RO

## Introduction

***

## Process Outline

***

## Set Up

### Import packages

In [1]:
import os
import pandas as pd
import geopandas as gpd
import zipfile
import json

### Parameters

#### Static Parameters

In [2]:
# Define directories and file paths
OUTPUT_DIR = os.path.normpath("./output_data")
INPUT_DIR = os.path.normpath("./input_data")

# Define file name and path for input zip and csv
ZIP_NAME = 'Future_Land_use__MTP2024_parcels_Symbology.zip'
ZIP_PATH = os.path.join(INPUT_DIR, ZIP_NAME)
FUTURE_LAND_USE_INPUT_NAME = "Future_Land_use__MTP2024_parcels_Symbology.csv"
FUTURE_LAND_USE_INPUT_PATH = os.path.join(INPUT_DIR, FUTURE_LAND_USE_INPUT_NAME)

# Define folder name and path for all extracted files from zip
EXTRACT_NAME = 'extracted_files'
EXTRACT_PATH = os.path.join(INPUT_DIR, EXTRACT_NAME)

# Define file name and path for extracted shapefile
SHAPEFILE_NAME = 'Future_Land_use__MTP2024_parcels_Symbology.shp'
SHAPEFILE_PATH = os.path.join(EXTRACT_PATH, SHAPEFILE_NAME)

# Define file name and path for csv split by geography and other attributes
FUTURE_LAND_USE_GEO_OUTPUT_NAME = "Future_Land_use__geography.csv"
FUTURE_LAND_USE_GEO_OUTPUT_PATH = os.path.join(OUTPUT_DIR, FUTURE_LAND_USE_GEO_OUTPUT_NAME)
FUTURE_LAND_USE_ATTRIB_OUTPUT_NAME = "Future_Land_use__attributes.csv"
FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH = os.path.join(OUTPUT_DIR, FUTURE_LAND_USE_ATTRIB_OUTPUT_NAME)

# Define file name and path for zipped RO-Crate
RO_CRATE_NAME = 'future-land-use-crated.zip'
RO_CRATE_PATH = os.path.join(OUTPUT_DIR, RO_CRATE_NAME)

# Define file name and path for RO-Crate metadata
RO_CRATE_METADATA_NAME = 'ro-crate-metadata.json'
RO_CRATE_METADATA_PATH = os.path.join(OUTPUT_DIR, RO_CRATE_METADATA_NAME)

# RO-Crate metadata definition
RO_CRATE_METADATA = {
    "@context": "https://w3id.org/ro/crate/1.1/context",
    "@graph": [
        {
            "@id": "ro-crate-metadata.json",
            "@type": "CreativeWork",
            "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
            "about": {"@id": "./"}
        },
        {
            "@id": "./",
            "@type": "Dataset",
            "name": "Future Land Use Data",
            "description": "Dataset containing future land use data with spatial and attribute information.",
            "hasPart": [
                {"@id": "Future_Land_use__geography.csv"},
                {"@id": "Future_Land_use__attributes.csv"},
                {"@id": "attributes_schema.json"}
            ]
        },
        {
            "@id": "Future_Land_use__geography.csv",
            "@type": "File",
            "name": "Shapefile Data CSV",
            "encodingFormat": "text/csv"
        },
        {
            "@id": "Future_Land_use__attributes.csv",
            "@type": "File",
            "name": "Attributes Data CSV",
            "encodingFormat": "text/csv"
        },
        {
            "@id": "attributes_schema.json",
            "@type": "File",
            "name": "Attributes Schema",
            "encodingFormat": "application/json"
        }
    ]
}

# Define file name and path for GeoPackage
OUTPUT_GEOPACKAGE_NAME = 'Future_Land_use.gpkg'
OUTPUT_GEOPACKAGE_PATH = os.path.join(OUTPUT_DIR, OUTPUT_GEOPACKAGE_NAME)

# Define file name and path for attributes schema
SCHEMA_NAME = 'attributes_schema.json'
SCHEMA_PATH = os.path.join(OUTPUT_DIR, SCHEMA_NAME)

# Frictionless schema for attributes CSV definition
ATTRIBUTES_SCHEMA = {
    "fields": [
        {"name": "OBJECTID", "type": "integer", "constraints": {"required": True}},
        {"name": "ExLU21", "type": "string", "constraints": {"required": True}},
        {"name": "FutLU21", "type": "string", "constraints": {"required": True}},
        {"name": "MIXcode", "type": "string"},
        {"name": "JoinAll", "type": "string"},
        {"name": "Place", "type": "string"},
        {"name": "last_edited_date", "type": "datetime"},
        {"name": "County", "type": "string"},
        {"name": "FUTsimple", "type": "string"},
    ],
    "primaryKey": ["OBJECTID"],
    "constraints": {
        "ExLU21": {"enum": ["HURB", "LURB", "LIND", "HSUB"]},
        "FutLU21": {"enum": ["HURB", "LURB", "LIND", "HSUB"]},
        "FUTsimple": {"enum": ["HURB", "LURB", "IND", "HSUB"]},
    },
    "description": {
        "ExLU21": "Refer to ExLU21 values",
        "FutLU21": "Refer to FutLU21 values",
        "FUTsimple": "Refer to FUTsimple values",
    }
}


# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

### Define Inputs

In [3]:
print("Zipped Future Land Use shapefile stored as: {}".format(ZIP_PATH))
print("Unzipped Future Land Use shapefiles will be stored in: {}".format(EXTRACT_PATH))
print("Future Land Use '.csv' stored as: {}".format(FUTURE_LAND_USE_INPUT_PATH))

Zipped Future Land Use shapefile stored in: input_data\Future_Land_use__MTP2024_parcels_Symbology.zip
Unzipped Future Land Use shapefiles stored in: input_data\extracted_files
Future Land Use '.csv' stored in: input_data\Future_Land_use__MTP2024_parcels_Symbology.csv


### Define Outputs

In [4]:
print("Geographic split '.csv' stored as: {}".format(FUTURE_LAND_USE_GEO_OUTPUT_PATH))
print("Non-geographic split '.csv' stored as: {}".format(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH))
print("Attribute schema stored as: {}".format(SCHEMA_PATH))
print("RO-Crate metadata stored as: {}".format(RO_CRATE_METADATA_PATH))
print("Zipped RO-Crate stored as: {}".format(RO_CRATE_PATH))
print("GeoPackage stored as: {}".format(OUTPUT_GEOPACKAGE_PATH))

Geographic split '.csv' stored in: output_data\Future_Land_use__geography.csv
Non-geographic split '.csv' stored in: output_data\Future_Land_use__attributes.csv
Attribute schema stored in: output_data\attributes_schema.json
RO-Crate metadata stored in: output_data\ro-crate-metadata.json
Zipped RO-Crate stored in: output_data\future-land-use-crated.zip
GeoPackage stored in: output_data\Future_Land_use.gpkg


## Code!

### Step 1: Extract the ZIP file and split the CSV

In [5]:
# Extract ZIP file
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(EXTRACT_PATH)

# Define data types for the CSV file
dtype = {
    'OBJECTID': 'int',
    'ExLU21': 'str',
    'FutLU21': 'str',
    'MIXcode': 'str',
    'JoinAll': 'str',
    'Place': 'str',
    'last_edited_date': 'str',
    'County': 'str',
    'FUTsimple': 'str'
}

# Load the CSV file with specified data types
data_df = pd.read_csv(FUTURE_LAND_USE_INPUT_PATH, dtype=dtype)

# Split the dataset into two CSVs using the correct column names
shapefile_df = data_df[['OBJECTID']]
attributes_df = data_df.drop(columns=['Shape__Area', 'Shape__Length'])
attributes_df['OBJECTID'] = data_df['OBJECTID']  # Add unique identifier back

# Save the CSVs
shapefile_df.to_csv(FUTURE_LAND_USE_GEO_OUTPUT_PATH, index=False)
attributes_df.to_csv(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, index=False)

### Step 2: Preparing RO Crate

In [6]:
# Save the schema to a JSON file
with open(SCHEMA_PATH, 'w') as f:
    json.dump(ATTRIBUTES_SCHEMA, f, indent=4)

# Save the RO-Crate metadata to a JSON file
with open(RO_CRATE_METADATA_PATH, 'w') as f:
    json.dump(RO_CRATE_METADATA, f, indent=4)

# Package the RO-Crate into a ZIP file
with zipfile.ZipFile(RO_CRATE_PATH, 'w') as ro_zip:
    ro_zip.write(FUTURE_LAND_USE_GEO_OUTPUT_PATH, 'Future_Land_use__geography.csv')
    ro_zip.write(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, 'Future_Land_use__attributes.csv')
    ro_zip.write(SCHEMA_PATH, 'attributes_schema.json')
    ro_zip.write(RO_CRATE_METADATA_PATH, 'ro-crate-metadata.json')

### Step 3: Exporting standard GeoPackage from Shapefile geodataframe and CSV dataframe

In [None]:
# Read the CSV file as a dataframe
attributes_df = pd.read_csv(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, dtype=dtype)

# Read the Shapefile as a geodataframe
shapefile_gdf = gpd.read_file(SHAPEFILE_PATH)

# Convert the Shapefile to Ohio State Plane South coordinate reference system
shapefile_gdf = shapefile_gdf.to_crs(epsg=3735)

# Join the CSV dataframe to the Shapefile geodataframe using the unique identifier field
merged_gdf = shapefile_gdf.merge(attributes_df, on='OBJECTID')

# Export the resulting geodataframe as a GeoPackage
merged_gdf.to_file(OUTPUT_GEOPACKAGE_PATH, driver='GPKG')

print(f"GeoPackage has been saved to {OUTPUT_GEOPACKAGE_PATH}")

### Step 4: Preview Outputs

In [None]:
# Check for the presence of the output files
output_files = [
    SCHEMA_PATH,
    FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH,
    FUTURE_LAND_USE_GEO_OUTPUT_PATH,
    RO_CRATE_PATH,
    OUTPUT_GEOPACKAGE_PATH
]

missing_files = [file for file in output_files if not os.path.exists(file)]
if missing_files:
    print("The following expected output files are missing:")
    for file in missing_files:
        print(file)
else:
    print("All expected output files are present.")

# Check the contents of the RO-Crate ZIP file
with zipfile.ZipFile(RO_CRATE_PATH, 'r') as ro_zip:
    print("RO-Crate ZIP contents:")
    print(ro_zip.namelist())

# Load and print the contents of the attributes schema JSON
with open(SCHEMA_PATH, 'r') as f:
    attributes_schema = json.load(f)
    print("Attributes Schema JSON:")
    print(json.dumps(attributes_schema, indent=4))

# Load and print the contents of the RO-Crate metadata JSON
with open(RO_CRATE_METADATA_PATH, 'r') as f:
    ro_crate_metadata = json.load(f)
    print("RO-Crate Metadata JSON:")
    print(json.dumps(ro_crate_metadata, indent=4))

# Load and print the first few rows of the attributes CSV
attributes_df = pd.read_csv(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, low_memory=False)
print("Attributes CSV Data:")
print(attributes_df.head())

# Load and print the first few rows of the geography CSV
geography_df = pd.read_csv(FUTURE_LAND_USE_GEO_OUTPUT_PATH)
print("Geography CSV Data:")
print(geography_df.head())