In [1]:
import pandas as pd
import numpy as np

Additional cleaning tasks:
* Remove faults occurring in the vicinity of the service locations at (36.0666667, -86.4347222), (35.5883333, -86.4438888), and (36.1950, -83.174722)
* Remove faults where the EquipmentID has more than 5 characters.

Upcoming full derate indicated by an SPN 5246 <-- requires a tow. 
SPN 1569, FMI = 31 75% derate

In [2]:
#Read in J1939Faults. low_memory = false because of warning about mixed type columns.
faults = pd.read_csv('../data/J1939Faults.csv',
                     index_col = 'RecordID',
                     parse_dates = ['EventTimeStamp', 'LocationTimeStamp'],
                     low_memory = False)
#remove faults that occur at service locations. Had to remove the last digit because the rows dont have it.
service_location1 = faults[(faults["Latitude"] == 36.066666) & (faults["Longitude"] == -86.434722)]

service_location2 = faults[(faults["Latitude"] == 35.588333) & (faults["Longitude"] == -86.443888)]

service_location3 = faults[(faults["Latitude"] == 36.1950) & (faults["Longitude"] ==  -83.174722)]

#concat service_locations for removal
service_locations = pd.concat([service_location1, service_location2, service_location3])
#drop service locations
faults_cleaned = faults.drop(service_locations.index)
#remove EquipmentIDs that are longer than 5 Characters per the README
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].map(len) <= 5]
#removed columns that only contained null values or would not be valuable because they're unique to the truck.
faults_cleaned = faults_cleaned.drop(['actionDescription', 'faultValue', 'ecuSerialNumber',
                                      'ecuSource', 'MCTNumber'], axis = 1).reset_index()
#consider keeping ecuSoftwareVersion, ecuModel, ecuMake. Maybe an updated software or particular make/model throws more codes

#bring in diagnostics and pivot
diagnostics = pd.read_csv('../data/VehicleDiagnosticOnboardData.csv') \
    .pivot(index = 'FaultId', columns = 'Name', values = 'Value')

#merge diagnostics and the full faults dataset
faults_all = faults_cleaned.merge(diagnostics, left_on = 'RecordID', right_on = 'FaultId') \
    .set_index('EventTimeStamp').sort_index()


#drop the "ServiceDistance" column and push to a csv.
faults_all = faults_all.drop('ServiceDistance', axis = 1).reset_index()
faults_all.to_csv('../data/faults_all.csv')

  mask |= (ar1 == a)


In [42]:
faults_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1184701 entries, 0 to 1184700
Data columns (total 15 columns):
 #   Column                 Non-Null Count    Dtype         
---  ------                 --------------    -----         
 0   RecordID               1184701 non-null  int64         
 1   ESS_Id                 1184701 non-null  int64         
 2   EventTimeStamp         1184701 non-null  datetime64[ns]
 3   eventDescription       1124368 non-null  object        
 4   ecuSoftwareVersion     888974 non-null   object        
 5   ecuModel               1120080 non-null  object        
 6   ecuMake                1120080 non-null  object        
 7   spn                    1184701 non-null  int64         
 8   fmi                    1184701 non-null  int64         
 9   active                 1184701 non-null  bool          
 10  activeTransitionCount  1184701 non-null  int64         
 11  EquipmentID            1184701 non-null  object        
 12  Latitude               11847

In [3]:
#find the trucks that derate 75% and 5%. 
derates = faults_cleaned[((faults_cleaned['spn'] == 1569) & (faults_cleaned['fmi'] == 31))| ((faults_cleaned['spn'] == 5246) & (faults_cleaned['fmi'] == 0))]


In [4]:
#create dataframe of only trucks going through a full derate
full_derates = faults_cleaned[((faults_cleaned['spn'] == 5246) & (faults_cleaned['fmi'] == 0))]

In [4]:
#get the EquipmentIDs of trucks that do eventually reach a derate.
derates['EquipmentID'].unique()

array(['1721', '1395', '1515', '1630', '1487', '1499', '1329', '1365',
       '1419', '1486', '1509', '1492', '1340', '1328', '1430', '1370',
       '1497', '1350', '1473', '1608', '1606', '1612', '1354', '1493',
       '1583', '1465', '1335', '1789', '1383', '1389', '1501', '1438',
       '1463', '1514', '1666', '1689', '1692', '1357', '1396', '310',
       '1602', '1660', '1445', '1431', '1327', '1795', '1399', '1494',
       '1367', '1364', '1452', '1595', '1740', '1366', '1339', '1425',
       '1358', '1646', '1391', '1428', '1338', '1637', '1346', '1490',
       '1562', '1433', '1663', '1417', '1424', '1771', '1790', '1591',
       '1797', '1596', '1421', '1444', '1480', '1466', '1457', '1794',
       '1696', '1703', '1626', '1715', '1437', '1453', '1584', '1597',
       '1610', '1377', '1652', '1517', '1784', '1751', '1786', '1590',
       '1341', '1382', '1714', '1711', '1369', '1398', '1791', '1772',
       '1472', '1418', '1645', '1475', '1815', '1806', '1607', '1560',
       

In [6]:
full_derates

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp
45,46,990931,2015-02-21 12:10:51,,04993120*00027849*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,5246,0,True,1,1395,36.065972,-86.433425,2015-02-21 12:11:27
1917,1919,1007751,2015-02-22 19:44:55,,04993120*00027849*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,5246,0,True,1,1395,36.066203,-86.434814,2015-02-22 19:46:27
2057,2059,1010486,2015-02-23 04:00:21,,04993120*00027849*082113134117*07700053*I0*BBZ*,6X1u10D1500000000,CMMNS,5246,0,False,1,1395,36.066666,-86.434537,2015-02-23 01:06:06
2088,2090,1011009,2015-02-23 05:05:44,,05290170*03015749*051914190353*09400015*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,True,1,1630,40.733009,-74.087777,2015-02-23 05:08:23
2970,2972,1026305,2015-02-23 15:54:22,,unknown,unknown,unknown,5246,0,True,1,1487,28.077361,-81.897083,2015-02-23 15:54:58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1179070,1241842,120905766,2020-02-13 13:33:36,,05317106*05088449*051718172255*09401583*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,False,1,1827,36.195138,-83.174953,2020-02-13 13:33:31
1179086,1241858,120910417,2020-02-13 14:01:40,,04358814*06026985*051718174436*09401683*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,False,1,1872,35.708101,-81.395648,2020-02-13 13:59:51
1180401,1244156,121610128,2020-02-19 07:02:33,,05317106*05005224*051718172255*09401583*G1*BDR*,6X1u13D1500000000,CMMNS,5246,0,True,1,1814,36.067037,-86.434120,2020-02-19 07:03:09
1181697,1245454,122305096,2020-02-24 15:27:26,,04384413*22246857*090619141107*60701756*G1*BGT*,6X1u17D1500000000,CMMNS,5246,0,True,1,2211,36.066620,-86.434722,2020-02-24 15:28:02


In [5]:
#create a list of trucks that reach a derate. this may make the EquipmentID <= 5 irrelevant since none of these
#are longer than 4 characters.
derates_list = ['1721', '1395', '1515', '1630', '1487', '1499', '1329', '1365',
       '1419', '1486', '1509', '1492', '1340', '1328', '1430', '1370',
       '1497', '1350', '1473', '1608', '1606', '1612', '1354', '1493',
       '1583', '1465', '1335', '1789', '1383', '1389', '1501', '1438',
       '1463', '1514', '1666', '1689', '1692', '1357', '1396', '310',
       '1602', '1660', '1445', '1431', '1327', '1795', '1399', '1494',
       '1367', '1364', '1452', '1595', '1740', '1366', '1339', '1425',
       '1358', '1646', '1391', '1428', '1338', '1637', '1346', '1490',
       '1562', '1433', '1663', '1417', '1424', '1771', '1790', '1591',
       '1797', '1596', '1421', '1444', '1480', '1466', '1457', '1794',
       '1696', '1703', '1626', '1715', '1437', '1453', '1584', '1597',
       '1610', '1377', '1652', '1517', '1784', '1751', '1786', '1590',
       '1341', '1382', '1714', '1711', '1369', '1398', '1791', '1772',
       '1472', '1418', '1645', '1475', '1815', '1806', '1607', '1560',
       '1450', '1401', '1572', '1375', '1368', '1742', '1650', '1443',
       '1495', '1670', '1587', '1778', '1566', '1617', '1691', '1573',
       '1477', '1476', '1524', '1441', '1405', '1577', '1447', '1659',
       '1408', '1439', '1403', '1458', '1668', '1814', '1567', '1535',
       '1373', '1446', '1700', '1629', '1730', '1665', '1381', '1551',
       '1413', '1643', '1508', '1440', '1455', '1844', '1571', '1429',
       '1491', '1803', '1467', '1378', '1488', '1739', '1448', '1420',
       '1384', '1683', '1372', '1502', '1582', '1464', '1599', '1712',
       '1407', '1603', '1644', '1757', '1618', '1604', '1519', '1831',
       '1631', '1811', '1628', '1818', '1423', '1636', '1555', '1719',
       '1638', '1710', '1823', '1435', '1808', '1392', '1669', '1649',
       '1579', '1416', '1575', '1397', '1534', '1539', '1510', '1813',
       '1601', '1657', '1621', '1651', '1620', '1708', '1592', '1613',
       '1619', '1925', '1887', '1662', '1640', '1881', '1563', '1713',
       '1557', '1839', '1833', '1605', '1513', '1639', '1866', '1485',
       '1581', '1549', '1776', '308', '1586', '1661', '1635', '1907',
       '1853', '1785', '1594', '1859', '1600', '1963', '1848', '1918',
       '1561', '1796', '1624', '1598', '1972', '1706', '1641', '1874',
       '1940', '1609', '1642', '1506', '1552', '1685', '1883', '1946',
       '1913', '1920', '1634', '1995', '1512', '1878', '1564', '1686',
       '1554', '1505', '1802', '1788', '1868', '1807', '1688', '1922',
       '1903', '1681', '1904', '1615', '1828', '1694', '1559', '1827',
       '1842', '1720', '1970', '1877', '1888', '1989', '1932', '1804',
       '1854', '1525', '1521', '1851', '1565', '1981', '1741', '1980',
       '1737', '1580', '1547', '1924', '1822', '1766', '1812', '1917',
       '1933', '1817', '1654', '1731', '2011', '2017', '2008', '1726',
       '1717', '1656', '1835', '1664', '1880', '1916', '1996', '1912',
       '1947', '1997', '1736', '1820', '1729', '1864', '1528', '1556',
       '1898', '1884', '1832', '1705', '1585', '1908', '1752', '1623',
       '1800', '1732', '1725', '1845', '1876', '1576', '1897', '1543',
       '1647', '1931', '1971', '1852', '1697', '1959', '1953', '1787',
       '1962', '1893', '2015', '2019', '1957', '1593', '1965', '1873',
       '1911', '1934', '1942', '1982', '1900', '2009', '1948', '1723',
       '2006', '1956', '2012', '1836', '1935', '307', '1879', '1792',
       '1865', '1885', '1690', '1793', '1860', '1894', '1856', '1704',
       '1614', '1655', '1667', '1632', '1699', '1687', '1625', '1871',
       '1750', '1611', '2013', '1939', '1886', '1999', '1819', '1901',
       '1821', '1847', '1701', '1588', '1838', '1849', '1775', '1743',
       '1952', '1988', '1716', '1862', '1863', '1622', '2004', '1764',
       '1905', '1698', '1846', '309', '1798', '1780', '1762', '1979',
       '2007', '1919', '1966', '1843', '1938', '1744', '1992', '1976',
       '1926', '2143', '1718', '1899', '2111', '1978', '1763', '1870',
       '1889', '1994', '2027', '1967', '1928', '1921', '1891', '1734',
       '1945', '2010', '306', '1951', '1768', '1781', '2002', '1728',
       '1869', '1735', '1964', '1858', '1758', '1958', '2120', '1756',
       '2209', '1944', '1927', '2005', '1709', '2082', '1914', '2020',
       '1872', '1975', '1857', '1733', '1961', '1954', '1985', '1824',
       '1993', '1895', '1906', '2018', '1977', '1834', '1930', '1969',
       '1991', '1984', '1949', '1896', '2109', '2030', '1950', '1855',
       '2000', '1986', '1782', '1937', '2021', '1943', '2089', '1968',
       '1909', '1829', '1861', '2175', '2211']
faults_cleaned = faults_cleaned[faults_cleaned['EquipmentID'].isin(derates_list)]

In [8]:
faults_cleaned[~faults_cleaned['EquipmentID'].isin(derates_list)]

Unnamed: 0,RecordID,ESS_Id,EventTimeStamp,eventDescription,ecuSoftwareVersion,ecuModel,ecuMake,spn,fmi,active,activeTransitionCount,EquipmentID,Latitude,Longitude,LocationTimeStamp


In [9]:
faults_cleaned['EquipmentID'].unique()

array(['1439', '1369', '1417', '1597', '1429', '1582', '309', '1601',
       '310', '1733', '1590', '1605', '1644', '1585', '1634', '1620',
       '1721', '1610', '1576', '1395', '1766', '1437', '1586', '1600',
       '1630', '1646', '1595', '1377', '1641', '1365', '1609', '1612',
       '1768', '1650', '1657', '1514', '1703', '1625', '1565', '1623',
       '1607', '1776', '1370', '1566', '1499', '1688', '1467', '1669',
       '1606', '1683', '1391', '1458', '1632', '1778', '1559', '1431',
       '1372', '1492', '1611', '1555', '1375', '1651', '1419', '1580',
       '1401', '1614', '1737', '1358', '1740', '1562', '1339', '1715',
       '1405', '1654', '1594', '1729', '1571', '1560', '1455', '1649',
       '1443', '1354', '1659', '1628', '1335', '1645', '1440', '1515',
       '1452', '1730', '1517', '1450', '1714', '1602', '1447', '1367',
       '1666', '1615', '1725', '1488', '1338', '1399', '1741', '1473',
       '1418', '1466', '1428', '1587', '307', '1575', '1640', '1551',
       '1

In [8]:
#merge our cleaned faults_cleaned dataframe with diagnostics 
faults_diagnostics = faults_cleaned.merge(diagnostics, left_on = 'RecordID', right_on = 'FaultId') \
    .set_index('EventTimeStamp').sort_index()

full_derates = full_derates.merge(diagnostics, left_on = 'RecordID', right_on = 'FaultId') \
    .set_index('EventTimeStamp').sort_index()

#remove 'ServiceDistance' because they're all null. 
faults_diagnostics = faults_diagnostics.drop('ServiceDistance', axis = 1).reset_index()
full_derates = full_derates.drop('ServiceDistance', axis = 1).reset_index()

In [9]:
faults_diagnostics.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 921737 entries, 0 to 921736
Data columns (total 38 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   EventTimeStamp             921737 non-null  datetime64[ns]
 1   RecordID                   921737 non-null  int64         
 2   ESS_Id                     921737 non-null  int64         
 3   eventDescription           889644 non-null  object        
 4   ecuSoftwareVersion         666527 non-null  object        
 5   ecuModel                   867546 non-null  object        
 6   ecuMake                    867546 non-null  object        
 7   spn                        921737 non-null  int64         
 8   fmi                        921737 non-null  int64         
 9   active                     921737 non-null  bool          
 10  activeTransitionCount      921737 non-null  int64         
 11  EquipmentID                921737 non-null  object  

In [10]:
#create new csv of the cleaned and merged dataset.
faults_diagnostics.to_csv('../data/faults_diagnostics.csv')
full_derates.to_csv('../data/full_derates.csv')

In [11]:
service_code = pd.read_excel('../data/Service Fault Codes_1_0_0_167.xlsx')

  warn(msg)


In [12]:
service_code[service_code['SPN']== 5246]

Unnamed: 0,Published in CES 14602,Cummins Fault Code,Revision,PID,SID,MID,J1587 FMI,SPN,J1939 FMI,J2012 Pcode,Lamp Color,Lamp Device,Cummins Description,Algorithm Description
2518,Y,3712,167,Not Mapped,Not Mapped,Not Mapped,0,5246,0,Not Mapped,Red,Stop / Shutdown,Aftertreatment SCR Operator Inducement - Data ...,SCR inducement of 5 mph derate - Fault Code 41...
2781,Y,4134,167,Not Mapped,Not Mapped,Not Mapped,0,5246,15,Not Mapped,Amber,Warning,Aftertreatment SCR Operator Inducement - Data ...,SCR inducement - Least Severe - Fault Code 371...
4338,Y,6254,167,Not Mapped,Not Mapped,Not Mapped,0,5246,16,Not Mapped,Amber,Warning,Aftertreatment SCR Operator Inducement Severit...,


In [13]:
service_code[service_code['SPN']== 1569]

Unnamed: 0,Published in CES 14602,Cummins Fault Code,Revision,PID,SID,MID,J1587 FMI,SPN,J1939 FMI,J2012 Pcode,Lamp Color,Lamp Device,Cummins Description,Algorithm Description
2520,Y,3714,167,Not Mapped,Not Mapped,Not Mapped,11,1569,31,Not Mapped,Amber,Warning,Engine Protection Torque Derate - Condition Ex...,
4339,Y,6255,167,Not Mapped,Not Mapped,Not Mapped,0,1569,15,Not Mapped,,,Engine Protection Torque Derate - Data Valid B...,
5095,Y,7285,167,Not Mapped,Not Mapped,Not Mapped,14,1569,14,Not Mapped,Amber,Warning,Engine Protection Torque Derate - Special Inst...,


In [14]:
vehicle_make = pd.read_excel('../data/Vehicle_Make.xlsx')