In [None]:
# Import libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [None]:
# Set view options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
# Read in faults and diagnostics data, merge datasets, and convert to geopandas dataframe
faults = pd.read_csv('../data/J1939Faults.csv', index_col = 'RecordID', parse_dates = ['EventTimeStamp', 'LocationTimeStamp']) \
    .drop(columns = ['actionDescription', 'faultValue'])

diagnostics = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv') \
    .pivot(index = 'FaultId', columns = 'Name', values = 'Value')

faults_diagnostics = faults.merge(diagnostics, left_on = 'RecordID', right_on = 'FaultId') \
    .set_index('EventTimeStamp').sort_index()

faults_diagnostics = gpd.GeoDataFrame(faults_diagnostics, geometry = gpd.points_from_xy(faults_diagnostics.Longitude, faults_diagnostics.Latitude))

In [None]:
# Filter faults where EquipmentID has more than 5 characters 
faults_diagnostics = faults_diagnostics[faults_diagnostics['EquipmentID'].astype(str).str.len() < 5]

# Filter faults occurring in vicinity of service locations
faults_diagnostics = faults_diagnostics[~(faults_diagnostics.distance(Point(-86.4347222, 36.0666667)) < 0.01)]
faults_diagnostics = faults_diagnostics[~(faults_diagnostics.distance(Point(-86.4438888, 35.5883333)) < 0.01)]
faults_diagnostics = faults_diagnostics[~(faults_diagnostics.distance(Point(-83.174722, 36.1950)) < 0.01)]

In [None]:
# Write faults_diagnostics to csv
faults_diagnostics.drop(columns = 'geometry').to_csv('../data/faults_diagnostics.csv')