In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
import folium
import geopandas as gpd


#load diagnostics data
diagnostics = pd.read_csv("../data/VehicleDiagnosticOnboardData.csv")

#load faults data

faults = pd.read_csv('../data/J1939Faults.csv')

#drop unnecessary columns
columns_to_drop = ['ESS_Id', 'actionDescription', 'ecuSoftwareVersion', 'ecuSerialNumber', 'ecuModel', 'ecuMake', 'ecuSource', 'faultValue', 'MCTNumber']
faults_a = faults.drop(columns=columns_to_drop)


# fix data types

faults_a['EventTimeStamp'] = pd.to_datetime(faults_a['EventTimeStamp'])
faults_a['LocationTimeStamp'] = pd.to_datetime(faults_a['LocationTimeStamp'])

#find unique list of equipments
equipment_list = faults_a['EquipmentID'].unique().tolist()

# the rest of the code is specific for a single equipment id
first_truck = faults_a[faults_a['EquipmentID']==equipment_list[0]]

# function to categorize time of day
def categorize_time_of_day(hour):
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    else:
        return 'Evening'

# Apply the function to create a new column for time of day
first_truck['time_of_day'] = first_truck['EventTimeStamp'].dt.hour.apply(categorize_time_of_day)

# new column for the month
first_truck['Month'] = first_truck['EventTimeStamp'].dt.month

# new column for year
first_truck['Year'] = first_truck['EventTimeStamp'].dt.year

#merge diagnostics and the truck data
merged_first_truck = pd.merge(first_truck, diagnostics.pivot(index='FaultId', columns='Name', values='Value'), 
                     left_on='RecordID',right_on= 'FaultId',how='left')

#it probably is already in the sorted form but still 
# merged_first_truck = first_truck.sort_values(by='EventTimeStamp')


merged_first_truck['geometry'] = gpd.points_from_xy(
    merged_first_truck['Longitude'], 
    merged_first_truck['Latitude']
    )

merged_first_truck_geo = gpd.GeoDataFrame(
    merged_first_truck, 
    crs = {'init':'epsg:4326'}, 
    geometry = merged_first_truck['geometry']
    )

# the above is needed if I want distance in meters later
# change back to 3310 ??
merged_first_truck_geo.to_crs(epsg = 3310, inplace = True)

  faults = pd.read_csv('../data/J1939Faults.csv')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_truck['time_of_day'] = first_truck['EventTimeStamp'].dt.hour.apply(categorize_time_of_day)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_truck['Month'] = first_truck['EventTimeStamp'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-cop

In [2]:
#create service center geo dataframe
service_centers = [
    (36.0666667, -86.4347222), 
    (35.5883333, -86.4438888), 
    (36.1950, -83.174722)
    ]  # latitude and longitude coordinates for service centers

service_centers_geo = [Point(lon, lat) for lat, lon in service_centers]
# same as before
service_centers_geo_df = gpd.GeoDataFrame(geometry=service_centers_geo, crs={'init':'epsg:4326'})
service_centers_geo_df.to_crs(epsg = 3310, inplace = True)


# now we want to filter dataframe to exclude data within 5 miles of all service center locations
distance_threshold = 5*1.609*1000 #meters

# Iterate over each point of interest
def filter(df,point):
    df['distance'] = df['geometry'].distance(point['geometry'])
    filtered_df = df[df['distance'] >= distance_threshold]
    return filtered_df

for index, row in service_centers_geo_df.iterrows():
    merged_first_truck_geo = filter(merged_first_truck_geo, row)
    
# this dataframe has all data but within 5 miles of service center locations for the one truck that I chose
merged_first_truck_geo 

  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,RecordID,EventTimeStamp,eventDescription,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,...,IntakeManifoldTemperature,LampStatus,ParkingBrake,ServiceDistance,Speed,SwitchedBatteryVoltage,Throttle,TurboBoostPressure,geometry,distance
0,1,2015-02-21 10:47:13,Low (Severity Low) Engine Coolant Level,111,17,True,2,1439,38.857638,-84.626851,...,78.8,1023,True,,0,3276.75,,0,POINT (2995931.113 658983.439),322511.478329
1,2,2015-02-21 11:34:34,,629,12,True,127,1439,38.857638,-84.626851,...,,1279,,,,,,,POINT (2995931.113 658983.439),322511.478329
2,12,2015-02-21 11:43:18,Low (Severity Low) Engine Coolant Level,111,17,False,2,1439,38.857592,-84.626805,...,,1023,,,,,,,POINT (2995936.690 658980.132),322505.189509
3,361,2015-02-21 16:45:27,,629,12,False,127,1439,36.975416,-84.106712,...,,1279,,,,,,,POINT (3115209.205 481119.723),120241.514181
4,363,2015-02-21 16:45:31,,629,12,True,127,1439,36.975462,-84.106666,...,,1279,,,,,,,POINT (3115211.115 481125.986),120242.338302
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,33641,2015-04-24 17:37:32,,629,12,False,127,1439,34.574953,-84.947916,...,,1279,,,,,,,POINT (3141956.989 205148.631),241393.279164
273,33642,2015-04-24 17:38:13,,51923,0,False,3,1439,34.574953,-84.947916,...,,255,,,,,,,POINT (3141956.989 205148.631),241393.279164
274,33643,2015-04-24 17:37:36,,629,12,True,127,1439,34.574953,-84.947916,...,,1279,,,,,,,POINT (3141956.989 205148.631),241393.279164
275,33667,2015-04-24 18:09:49,,629,12,False,127,1439,34.575000,-84.947870,...,,1279,,,,,,,POINT (3141959.038 205155.019),241386.575748
