In [1]:
%%HTML
<style>
    body {
        --vscode-font-family: "Inter";
    }
</style>
%%HTML
<style>
    body {
        --vscode-font-family: "Inter";
    }
</style>


<center>
<h1 style="font-size: 50px; font-weight: bold; color:sandybrown">OC SWITRS Data Processing</h1>

<div style="font-size: 40px; font-weight: bold; color: sandybrown">Stata Integration Data Operations</div>
<div style="font-size: 30px; font-weight: bold; color: sandybrown">v.2, November 2024</div>
</center>

---

### <font color="lime">**Preliminaries**</font>

Instantiating python libraries for the project

In [2]:
# Import Python libraries
import os, json, pytz, math, arcpy, arcgis
from datetime import date, time, datetime, timedelta, tzinfo, timezone
from tqdm.notebook import trange, tqdm, tqdm_notebook
import pandas as pd
import numpy as np
from pandas.api.types import infer_dtype, is_numeric_dtype, is_object_dtype, is_float_dtype, is_integer_dtype, is_string_dtype, is_datetime64_any_dtype, is_complex_dtype, is_interval_dtype, is_sparse, is_integer, is_any_real_numeric_dtype

# important as it "enhances" Pandas by importing these classes (from ArcGIS API for Python)
from arcgis.features import GeoAccessor, GeoSeriesAccessor


### <font color="lime">**Project and Workspace Variables**</font>

Define and maintain project, workspace, ArcGIS, and data-related variables.

Project and ArcGIS Pro project path

In [3]:
# Environment variables for OneDrive path
#oneDrivePath = os.getenv("OneDriveCommercial")

# OC SWITRS project path
#projectPath = os.path.join(oneDrivePath, "Projects", "OCTraffic", "OCSWITRS")
projectPath = os.path.join(r"D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS")

# OC SWITRS ArcGIS Pro project path
agpPath = os.path.join(projectPath, "AGPSWITRS")


ArcGIS related paths

In [None]:
# ArcGIS Pro project name and path
aprx = "AGPSWITRS.aprx"
aprxPath = os.path.join(agpPath, aprx)

# ArcGIS project geodatabase and path
gdb = "AGPSWITRS.gdb"
gdbPath = os.path.join(agpPath, gdb)

# Current ArcGIS workspace (arcpy)
arcpy.env.workspace = gdbPath
workspace = arcpy.env.workspace

# Enable overwriting existing outputs
arcpy.env.overwriteOutput = True


Project folder paths

In [6]:
# RawData folder path
rawDataPath = os.path.join(projectPath, "RawData")

# Layers folder path
layersPath = os.path.join(projectPath, "Layers")

# Notebooks folder path
notebooksPath = os.path.join(projectPath, "Notebooks")

# Supporting data path on the project geodatabase (feature directory)
supportingDataPath = os.path.join(workspace, "SupportingData")


Data folder contents

The most current raw data files cover the periods from 01/01/2013 to 06/30/2024. The data files are in CSV format and are stored in the data folder, after downloaded from the SWITRS Database (https://tims.berkeley.edu/tools/query/summary.php). Date variables are defined below.

In [7]:
# add the date 01/01/2013 to a new python datetime object named 'rawDateStart'
rawDateStart = datetime(2013, 1, 1)

# add the date 06/30/2024 to a new python datetime object named 'rawDateEnd'
rawDateEnd = datetime(2024, 6, 30)


In [8]:
# Paths to raw data (crashes, parties, victims)
#rawCrashesPath = os.path.join(rawDataPath, "Crashes.csv")
#rawPartiesPath = os.path.join(rawDataPath, "Parties.csv")
#rawVictimsPath = os.path.join(rawDataPath, "Victims.csv")
rawCrashesPath = os.path.join(rawDataPath, "Crashes_"+rawDateStart.strftime("%Y%m%d")+"_"+rawDateEnd.strftime("%Y%m%d")+".csv")
rawPartiesPath = os.path.join(rawDataPath, "Parties_"+rawDateStart.strftime("%Y%m%d")+"_"+rawDateEnd.strftime("%Y%m%d")+".csv")
rawVictimsPath = os.path.join(rawDataPath, "Victims_"+rawDateStart.strftime("%Y%m%d")+"_"+rawDateEnd.strftime("%Y%m%d")+".csv")

# Path to JSON codebook
codebookPath = os.path.join(rawDataPath, "codebook.json")

# Paths to supporting data (feature classes)
boundariesPath = os.path.join(supportingDataPath, "OCSWITRS_Boundaries")
citiesPath = os.path.join(supportingDataPath, "OCSWITRS_Cities")
roadsPath = os.path.join(supportingDataPath, "OCSWITRS_Roads")


Display all information about paths, folders, and data files

In [9]:
# Display all information
print("Key Project Information")
print(f"\n\t- Name: {aprx}\n\t- Path: {aprxPath}\n\t- Project Path: {projectPath}\n\t- Workspace: {workspace}\n\t-Geodatabase: {gdb}\n\t- Geodatabase Path: {gdbPath}")
print("\nProject Directories:")
print(f"\n\t- Raw Data: {rawDataPath}\n\t- Layers: {layersPath}\n\t- Notebooks: {notebooksPath}\n\t- Supporting Data: {supportingDataPath}")
print("\nSupporting Feature Classes:")
print(f"\n\t- Boundaries: {boundariesPath}\n\t- Cities: {citiesPath}\n\t- Roads: {roadsPath}")
print("\nOther Supporting Data:")
print(f"\n\t- Codebook: {codebookPath}\n\t- Raw Data Files:\n\t\t- Crashes: {rawCrashesPath}\n\t\t- Parties: {rawPartiesPath}\n\t\t- Victims: {rawVictimsPath}")


Key Project Information

	- Name: AGPSWITRS.aprx
	- Path: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.aprx
	- Project Path: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS
	- Workspace: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.gdb
	-Geodatabase: AGPSWITRS.gdb
	- Geodatabase Path: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.gdb

Project Directories:

	- Raw Data: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\RawData
	- Layers: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\Layers
	- Notebooks: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\Notebooks
	- Supporting Data: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.gdb\SupportingData

Supporting Feature Classes:

	- Boundaries: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.gdb\SupportingData\OCSWITRS_Boundaries
	- Cities: D:\Professional\Projects-OCPW\OCTraffic\OCSWITRS\AGPSWITRS\AGPSWITRS.gdb\SupportingData

## <font color="orangered">**Importing Raw Data**</font>

Importing the pandas pickle files

In [None]:
pklCrashes = os.path.join(rawDataPath, "dfCrashes.pkl")
pklParties = os.path.join(rawDataPath, "dfParties.pkl")
pklVictims = os.path.join(rawDataPath, "dfVictims.pkl")
pklCollisions = os.path.join(rawDataPath, "dfCollisions.pkl")


In [None]:
dfCrashes = pd.read_pickle(pklCrashes)
dfParties = pd.read_pickle(pklParties)
dfVictims = pd.read_pickle(pklVictims)
dfCollisions = pd.read_pickle(pklCollisions)
