# Vehicular Crashes in Longmont, Colorado

This script reads in vehicle crash data spreadsheets downloaded from CDOT. It cleans and processes the data from crashes in Longmont only and concatenates data over several years to create a single CSV time series.

Source: https://www.codot.gov/safety/traffic-safety/data-analysis/crash-data

In [1]:
import pandas as pd
import numpy as np

In [2]:
# input file list (2021 & newer only, older files follow a different format)
files = ['CDOTRM_CD_Crash_Listing_-_2021.xlsx',
         'CDOTRM_CD_Crash_Listing_-_2022.xlsx',
         'CDOTRM_CD_Crash_Listing_-_2023.xlsx',
         'CDOTRM_CD_Crash_Listing_-_2024.xlsx'
        ]

# create an empty dataframe to populate with each year's data
all_crashes = pd.DataFrame()

In [3]:
def process_crashes(crashes):
    # Save only crashes in Longmont
    crashes = crashes.loc[crashes.City == 'LONGMONT'].copy()

    # replace CUID with year-CUID
    crashes['CUID'] = crashes['Crash Date'].dt.year.astype(str)+'-'+crashes['CUID'].astype(str)

    # Flag accidents where vehicles were speeding (1=yes, 0=no)
    crashes['TU-1 Speeding'] =  np.where(crashes['TU-1 Estimated Speed'] > crashes['TU-1 Speed Limit'], 1, 0)
    crashes['TU-2 Speeding'] =  np.where(crashes['TU-2 Estimated Speed'] > crashes['TU-2 Speed Limit'], 1, 0)

    # Fill nan values with empty string
    crashes = crashes.fillna('')
    
    return crashes

In [4]:
print('Processing:')

for file in files:

    print('\t'+file)
    
    # read each file into a dataframe (calamine engine is many times faster than the default
    #    openpyxl engine
    temp_df = pd.read_excel(file, header = 0, engine='calamine')

    # call function to clean data
    processed_df = process_crashes(temp_df)

    # concatenate newly processed dataframe to previously cleaned data
    all_crashes = pd.concat([all_crashes, processed_df], ignore_index = True)

print('Finished')

Processing:
	CDOTRM_CD_Crash_Listing_-_2021.xlsx
	CDOTRM_CD_Crash_Listing_-_2022.xlsx
	CDOTRM_CD_Crash_Listing_-_2023.xlsx
	CDOTRM_CD_Crash_Listing_-_2024.xlsx
Finished


In [5]:
# Create a list of unwanted columns to drop
drop_columns = ['Agency Id',
                'City',
                'County',
                'Rd_Section',
                'Rd_Number',
                'Record Status',
                'Processing Status',
                'Last Updated',
                'Link'
               ]

all_crashes.drop(drop_columns, inplace=True, axis=1)

# write everything to csv
all_crashes.to_csv('longmont_crashes.csv', sep=',')