# Future Land Use RO

## Introduction

***

## Process Outline

***

## Set Up

### Import packages

In [3]:
import os
import pandas as pd
import geopandas as gpd
import zipfile
import json

### Parameters

#### Static Parameters

In [4]:
# Define directories and file paths
OUTPUT_DIR = os.path.normpath("./output_data")
INPUT_DIR = os.path.normpath("./input_data")

ZIP_NAME = 'Future_Land_use__MTP2024_parcels_Symbology.zip'
ZIP_PATH = os.path.join(INPUT_DIR, ZIP_NAME)
FUTURE_LAND_USE_INPUT_NAME = "Future_Land_use__MTP2024_parcels_Symbology.csv"
FUTURE_LAND_USE_INPUT_PATH = os.path.join(INPUT_DIR, FUTURE_LAND_USE_INPUT_NAME)
EXTRACT_NAME = 'extracted_files'
EXTRACT_PATH = os.path.join(INPUT_DIR, EXTRACT_NAME)

FUTURE_LAND_USE_GEO_OUTPUT_NAME = "Future_Land_use__geography.csv"
FUTURE_LAND_USE_GEO_OUTPUT_PATH = os.path.join(OUTPUT_DIR, FUTURE_LAND_USE_GEO_OUTPUT_NAME)
FUTURE_LAND_USE_ATTRIB_OUTPUT_NAME = "Future_Land_use__attributes.csv"
FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH = os.path.join(OUTPUT_DIR, FUTURE_LAND_USE_ATTRIB_OUTPUT_NAME)

RO_CRATE_NAME = 'future-land-use-crated.zip'
RO_CRATE_PATH = os.path.join(OUTPUT_DIR, RO_CRATE_NAME)

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

***

## Code!

### Step 1: Extract the ZIP file and split the Excel file

In [5]:
# Extract ZIP file
with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(EXTRACT_PATH)

# Define data types for the CSV file
dtype = {
    'OBJECTID': 'int',
    'ExLU21': 'str',
    'FutLU21': 'str',
    'MIXcode': 'str',
    'JoinAll': 'str',
    'Place': 'str',
    'last_edited_date': 'str',
    'County': 'str',
    'FUTsimple': 'str'
}

# Load the CSV file with specified data types
data_df = pd.read_csv(FUTURE_LAND_USE_INPUT_PATH, dtype=dtype)

# Split the dataset into two CSVs using the correct column names
shapefile_df = data_df[['OBJECTID']]
attributes_df = data_df.drop(columns=['Shape__Area', 'Shape__Length'])
attributes_df['OBJECTID'] = data_df['OBJECTID']  # Add unique identifier back

# Save the CSVs
shapefile_df.to_csv(FUTURE_LAND_USE_GEO_OUTPUT_PATH, index=False)
attributes_df.to_csv(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, index=False)

# Frictionless schema for attributes CSV
attributes_schema = {
    "fields": [
        {"name": "OBJECTID", "type": "integer", "constraints": {"required": True}},
        {"name": "ExLU21", "type": "string", "constraints": {"required": True}},
        {"name": "FutLU21", "type": "string", "constraints": {"required": True}},
        {"name": "MIXcode", "type": "string"},
        {"name": "JoinAll", "type": "string"},
        {"name": "Place", "type": "string"},
        {"name": "last_edited_date", "type": "datetime"},
        {"name": "County", "type": "string"},
        {"name": "FUTsimple", "type": "string"},
    ],
    "primaryKey": ["OBJECTID"],
    "constraints": {
        "ExLU21": {"enum": ["HURB", "LURB", "LIND", "HSUB"]},
        "FutLU21": {"enum": ["HURB", "LURB", "LIND", "HSUB"]},
        "FUTsimple": {"enum": ["HURB", "LURB", "IND", "HSUB"]},
    },
    "description": {
        "ExLU21": "Refer to ExLU21 values",
        "FutLU21": "Refer to FutLU21 values",
        "FUTsimple": "Refer to FUTsimple values",
    }
}

# Save the schema to a JSON file
schema_path = os.path.join(OUTPUT_DIR, 'attributes_schema.json')
with open(schema_path, 'w') as f:
    json.dump(attributes_schema, f, indent=4)

# RO-Crate metadata
ro_crate_metadata = {
    "@context": "https://w3id.org/ro/crate/1.1/context",
    "@graph": [
        {
            "@id": "ro-crate-metadata.json",
            "@type": "CreativeWork",
            "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
            "about": {"@id": "./"}
        },
        {
            "@id": "./",
            "@type": "Dataset",
            "name": "Future Land Use Data",
            "description": "Dataset containing future land use data with spatial and attribute information.",
            "hasPart": [
                {"@id": "Future_Land_use__geography.csv"},
                {"@id": "Future_Land_use__attributes.csv"},
                {"@id": "attributes_schema.json"}
            ]
        },
        {
            "@id": "Future_Land_use__geography.csv",
            "@type": "File",
            "name": "Shapefile Data CSV",
            "encodingFormat": "text/csv"
        },
        {
            "@id": "Future_Land_use__attributes.csv",
            "@type": "File",
            "name": "Attributes Data CSV",
            "encodingFormat": "text/csv"
        },
        {
            "@id": "attributes_schema.json",
            "@type": "File",
            "name": "Attributes Schema",
            "encodingFormat": "application/json"
        }
    ]
}

# Save the RO-Crate metadata to a JSON file
ro_crate_metadata_path = os.path.join(OUTPUT_DIR, 'ro-crate-metadata.json')
with open(ro_crate_metadata_path, 'w') as f:
    json.dump(ro_crate_metadata, f, indent=4)

# Package the RO-Crate into a ZIP file
with zipfile.ZipFile(RO_CRATE_PATH, 'w') as ro_zip:
    ro_zip.write(FUTURE_LAND_USE_GEO_OUTPUT_PATH, 'Future_Land_use__geography.csv')
    ro_zip.write(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, 'Future_Land_use__attributes.csv')
    ro_zip.write(schema_path, 'attributes_schema.json')
    ro_zip.write(ro_crate_metadata_path, 'ro-crate-metadata.json')

# Additional functionality: Read, transform, join, and export as GeoPackage
# Read the CSV file as a dataframe
attributes_df = pd.read_csv(FUTURE_LAND_USE_ATTRIB_OUTPUT_PATH, dtype=dtype)

# Correct the shapefile path
shapefile_path = os.path.join(EXTRACT_PATH, 'path_to_shapefile.shp')  # Update with actual shapefile name

# Check if the shapefile exists and print its name
for file in os.listdir(EXTRACT_PATH):
    if file.endswith(".shp"):
        shapefile_path = os.path.join(EXTRACT_PATH, file)
        break

# Read the Shapefile as a geodataframe
shapefile_gdf = gpd.read_file(shapefile_path)

# Convert the Shapefile to Ohio State Plane South coordinate reference system
shapefile_gdf = shapefile_gdf.to_crs(epsg=3735)

# Join the CSV dataframe to the Shapefile geodataframe using the unique identifier field
merged_gdf = shapefile_gdf.merge(attributes_df, on='OBJECTID')

# Export the resulting geodataframe as a GeoPackage
output_geopackage_path = os.path.join(OUTPUT_DIR, 'Future_Land_use.gpkg')
merged_gdf.to_file(output_geopackage_path, driver='GPKG')

print(f"GeoPackage has been saved to {output_geopackage_path}")

GeoPackage has been saved to output_data\Future_Land_use.gpkg


## Preview Output

In [6]:
# Paths to the provided files
output_dir = "./output_data"
attributes_schema_path = os.path.join(output_dir, 'attributes_schema.json')
attributes_csv_path = os.path.join(output_dir, 'Future_Land_use__attributes.csv')
geography_csv_path = os.path.join(output_dir, 'Future_Land_use__geography.csv')
ro_crate_zip_path = os.path.join(output_dir, 'future-land-use-crated.zip')
ro_crate_metadata_path = os.path.join(output_dir, 'ro-crate-metadata.json')

# Check for the presence of the output files
output_files = [
    attributes_schema_path,
    attributes_csv_path,
    geography_csv_path,
    ro_crate_zip_path,
    ro_crate_metadata_path
]

missing_files = [file for file in output_files if not os.path.exists(file)]
if missing_files:
    print("The following expected output files are missing:")
    for file in missing_files:
        print(file)
else:
    print("All expected output files are present.")

# Check the contents of the RO-Crate ZIP file
with zipfile.ZipFile(ro_crate_zip_path, 'r') as ro_zip:
    print("RO-Crate ZIP contents:")
    print(ro_zip.namelist())

# Load and print the contents of the attributes schema JSON
with open(attributes_schema_path, 'r') as f:
    attributes_schema = json.load(f)
    print("Attributes Schema JSON:")
    print(json.dumps(attributes_schema, indent=4))

# Load and print the contents of the RO-Crate metadata JSON
with open(ro_crate_metadata_path, 'r') as f:
    ro_crate_metadata = json.load(f)
    print("RO-Crate Metadata JSON:")
    print(json.dumps(ro_crate_metadata, indent=4))

# Load and print the first few rows of the attributes CSV
attributes_df = pd.read_csv(attributes_csv_path, low_memory=False)
print("Attributes CSV Data:")
print(attributes_df.head())

# Load and print the first few rows of the geography CSV
geography_df = pd.read_csv(geography_csv_path)
print("Geography CSV Data:")
print(geography_df.head())

All expected output files are present.
RO-Crate ZIP contents:
['Future_Land_use__geography.csv', 'Future_Land_use__attributes.csv', 'attributes_schema.json', 'ro-crate-metadata.json']
Attributes Schema JSON:
{
    "fields": [
        {
            "name": "OBJECTID",
            "type": "integer",
            "constraints": {
                "required": true
            }
        },
        {
            "name": "ExLU21",
            "type": "string",
            "constraints": {
                "required": true
            }
        },
        {
            "name": "FutLU21",
            "type": "string",
            "constraints": {
                "required": true
            }
        },
        {
            "name": "MIXcode",
            "type": "string"
        },
        {
            "name": "JoinAll",
            "type": "string"
        },
        {
            "name": "Place",
            "type": "string"
        },
        {
            "name": "last_edited_date",
        