### Pothole/Non-Pothole Data Processing

Description: stiches sensor and pothole files together into a new combined file. Breaks up data into time intervals.

In [4]:
import pandas as pd
pd.options.display.max_rows = 100
pd.set_option('display.precision', 10)  #for displaying timestamps

In [8]:
sensorsFilePath = 'data/Pothole_Non_Pothole/trip5_sensors.csv'
potholesFilePath = 'data/Pothole_Non_Pothole/trip5_potholes.csv'
combinedFilePath = 'data/Pothole_Non_Pothole/trip5_intervals.csv'
sensorsDF = pd.read_csv(sensorsFilePath)
potholesDF = pd.read_csv(potholesFilePath)

#### Sensor Data

In [9]:
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1493003562.6,40.4475257722,-79.9441930614,0.0,0.217666626,-0.9800415039,0.1583404541,0.032959548,0.0487339837,0.1078993031
1,1493003562.7,40.4475257722,-79.9441930614,0.0,0.0113525391,-0.9469299316,0.212097168,0.0235993357,-0.0873756522,0.0175265294
2,1493003562.9,40.4475257722,-79.9441930614,0.0,0.023651123,-0.9569549561,0.208114624,-0.009586581,0.0053814496,0.0030866037
3,1493003563.1,40.4475257822,-79.9441930619,0.0,0.0610809326,-0.9560852051,0.201461792,-0.0148543136,0.0011598067,-0.0033537188
4,1493003563.3,40.4475257822,-79.9441930619,0.0,0.0701599121,-0.9564666748,0.1945037842,-0.0127994185,-0.0009803096,0.000937699


#### Pothole Data

In [40]:
potholesDF.head()

Unnamed: 0,timestamp
0,1493003685.7
1,1493003686.7
2,1493003692.6
3,1493003709.0
4,1493003714.0


#### Grouping points into time intervals with combined sensor/pothole data

Each interval will represent 2 seconds (10 points) of data. They will also contain the following aggregate statistics for points in that interval:

- meanSpeed: mean speed in interval
- sdSpeed: standard deviation of speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- minAccelX, minAccelY, minAccelZ: minimum value of accelerometerX, accelerometerY, accelerometerY
- minGyroX, minGyroY, minGyroZ: minimum value of gyroX, gyroY, gyroZ
- meanAccelX, meanAccelY, meanAccelZ: mean of accelerometerX, accelerometerY, accelerometerY
- meanGyroX, meanGyroY, meanGyroZ: mean of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- pothole (True/False): whether a pothole was encountered in that interval

In [41]:
# Time window (2 seconds) in number of points
# 5 points = 1 second
window = 10

def intervalHasPothole(intervalStart, intervalEnd, potholesDF):
    potholeTimestamps = potholesDF['timestamp']
    for index, potholeTime in potholeTimestamps.iteritems():
        if intervalStart < potholeTime and potholeTime <= intervalEnd:
            return True
    return False

intervalsDF = pd.DataFrame(columns=['meanSpeed', 'sdSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'minAccelX', 'minAccelY', 
                                    'minAccelZ', 'minGyroX', 'minGyroY', 'minGyroZ', 'meanAccelX', 
                                    'meanAccelY', 'meanAccelZ', 'meanGyroX', 'meanGyroY', 'meanGyroZ', 
                                    'sdAccelX', 'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY', 
                                    'sdGyroZ', 'pothole'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+window]
    intervalStart = list(interval['timestamp'])[0]
    intervalEnd = list(interval['timestamp'])[-1]
    meanSpeed, sdSpeed = interval['speed'].mean(), interval['speed'].std()
    maxAccelX, minAccelX = interval['accelerometerX'].max(), interval['accelerometerX'].min()
    maxAccelY, minAccelY = interval['accelerometerY'].max(), interval['accelerometerY'].min()
    maxAccelZ, minAccelZ = interval['accelerometerZ'].max(), interval['accelerometerZ'].min()
    maxGyroX, minGyroX = interval['gyroX'].max(), interval['gyroX'].min()
    maxGyroY, minGyroY = interval['gyroY'].max(), interval['gyroY'].min()
    maxGyroZ, minGyroZ = interval['gyroZ'].max(), interval['gyroZ'].min()
    meanAccelX, sdAccelX = interval['accelerometerX'].mean(), interval['accelerometerX'].std()
    meanAccelY, sdAccelY = interval['accelerometerY'].mean(), interval['accelerometerY'].std()
    meanAccelZ, sdAccelZ = interval['accelerometerZ'].mean(), interval['accelerometerZ'].std()
    meanGyroX, sdGyroX = interval['gyroX'].mean(), interval['gyroX'].std()
    meanGyroY, sdGyroY = interval['gyroY'].mean(), interval['gyroY'].std()
    meanGyroZ, sdGyroZ = interval['gyroZ'].mean(), interval['gyroZ'].std()
    pothole = intervalHasPothole(intervalStart, intervalEnd, potholesDF)
    intervalSummary = [meanSpeed, sdSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, minAccelX, minAccelY, minAccelZ, minGyroX, minGyroY, minGyroZ, 
                       meanAccelX, meanAccelY, meanAccelZ, meanGyroX, meanGyroY, meanGyroZ,
                       sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, sdGyroZ, pothole]
    intervalsDF.loc[intervalIndex] = intervalSummary
    
intervalsDF.head()

Unnamed: 0,meanSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,minAccelX,minAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
0,0.0,0.0,0.217666626,-0.9469299316,0.212097168,0.032959548,0.0487339837,0.1078993031,0.0113525391,-0.9800415039,...,-0.0043633764,-0.0046499325,0.0127618946,0.0557300765,0.0087466703,0.0144098904,0.0174007225,0.0331251346,0.0339172088,False
1,0.0479999989,0.1011928829,0.0785064697,-0.9331359863,0.271774292,-0.003179548,0.0096606169,0.0072680329,0.0588684082,-0.9709320068,...,-0.0141437556,-0.0005465073,-0.0017427194,0.0069277981,0.0133113464,0.0490568595,0.0071068851,0.0053237358,0.0050463825,False
2,0.2259999931,0.0800277703,0.0674438477,-0.946182251,0.2098083496,-0.006333796,0.0054416371,0.0020445088,0.0550079346,-0.9678955078,...,-0.0132972965,-0.0008710934,-0.0015199726,0.0045312778,0.0070150467,0.0209263038,0.0036947555,0.0040217262,0.0025294318,False
3,0.0569999993,0.1802498244,0.1011352539,-0.9107055664,0.3683624268,0.0032314797,0.0086475504,0.0052911685,0.0664672852,-0.9496612549,...,-0.0090338419,-0.0018563032,-0.0023267506,0.0088680387,0.0120398286,0.0522918066,0.0081263068,0.0063212338,0.0049780064,False
4,1.2889999986,0.6616889319,0.1100006104,-0.8900909424,0.365234375,-0.0053862433,0.0597397633,0.0158157148,0.0283508301,-0.9397277832,...,-0.0178772678,0.0196877912,-0.0057781541,0.0243626337,0.0170524731,0.0215171973,0.0065494289,0.022648622,0.0163744159,False


#### Intervals with potholes

In [42]:
intervalsDF[intervalsDF['pothole'] == True]

Unnamed: 0,meanSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,minAccelX,minAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
61,9.607999897,0.7463507144,0.319152832,-0.6574707031,0.6968536377,0.1304916976,0.3210717663,0.1497466186,-0.71824646,-1.3377685547,...,0.028658889,0.0549089222,-0.0330075381,0.3189966289,0.231352721,0.1523701298,0.0915202664,0.2103093672,0.166110547,True
62,11.5019997597,0.5442591529,0.4201660156,-0.8198394775,0.6541442871,0.0927733471,0.6253592261,0.1302813079,-0.3746490479,-1.2853546143,...,-0.0563687872,0.0367370289,-0.0550488447,0.1945242572,0.1340302622,0.1228299154,0.1610075926,0.2283527206,0.2272700085,True
65,13.7519997597,0.2247366689,0.1572875977,-0.730255127,0.3397369385,0.3867916578,0.205310279,0.3228387737,-0.1331787109,-1.0730285645,...,-0.0074892884,-0.0279241495,0.0183342662,0.0842193626,0.1124882588,0.1092399833,0.1828185347,0.1327779127,0.1268884029,True
73,11.7749997139,0.6354569821,0.5521087646,-0.7353668213,0.5048828125,0.6640530925,0.2008843716,0.5188905731,-0.6811828613,-1.0848083496,...,0.0882258398,-0.1600026118,0.1418432087,0.3440100626,0.127034333,0.0902955121,0.2291065399,0.2967643671,0.2459798362,True
75,13.6869999886,0.3349976438,0.6703491211,-0.731918335,0.6246643066,0.6866204533,0.6902575324,0.1733624659,-0.8412780762,-1.0897674561,...,0.0438787215,0.0339555702,-0.0670231094,0.3664967171,0.1070546572,0.0947587328,0.2565079154,0.291971253,0.2426217373,True
76,15.4270003319,0.7250447549,0.8647766113,-0.5925292969,0.4685821533,0.3289283578,0.3860012316,0.5476415276,-0.7893829346,-1.2871398926,...,-0.024587981,-0.0371737607,0.017649967,0.4312860428,0.1883023337,0.1619944648,0.1814881912,0.2984256896,0.2925179091,True
77,16.798000145,0.1394275821,0.5782165527,-0.4690856934,0.6850280762,0.2723574899,0.7928491555,0.2537073728,-0.4995727539,-1.817199707,...,-0.0581144094,0.2337382187,-0.1230664583,0.304803002,0.361255068,0.2315758112,0.2555924534,0.2863408018,0.182486871,True
78,15.1100003242,1.6310870475,0.1742248535,-0.9388580322,0.118270874,0.1890745837,0.0537247476,0.2211267927,-0.2343292236,-1.2772216797,...,-0.0009630257,-0.0399843011,0.032826097,0.134559983,0.1104204978,0.0662914828,0.084934606,0.0985365618,0.0880066292,True
80,7.3979997635,1.022467367,0.4824676514,-0.6853637695,0.7453918457,0.1667258685,0.6318067391,0.3324674326,-0.5041351318,-1.1439056396,...,0.001840484,0.0759264297,-0.0693743877,0.2740424441,0.1349041061,0.1065849686,0.1066668312,0.2872687266,0.2769360018,True
82,16.0609994888,1.0029556957,0.83152771,-0.5032806396,0.5885467529,0.1831077713,0.4759843848,0.2338279406,-0.3936309814,-1.2778930664,...,-0.0006775614,0.0143625875,-0.0126253277,0.4260477774,0.2156084985,0.2016259649,0.1749885477,0.2703476917,0.1986334967,True


#### Save to CSV

In [43]:
intervalsDF.to_csv(combinedFilePath)

### Road Conditions (Good Road/Bad Road) Data Processing

Description: Breaks up road condtions data into time intervals. Only contains sensor, not pothole data. Labels are: 0 (good road) and 1 (bad road)

In [47]:
sensorsFilePath = 'data/Good_Road_Bad_Road/bad5_sensors.csv'
intervalsFilePath = 'data/Good_Road_Bad_Road/bad5_intervals.csv'
label = 1
sensorsDF = pd.read_csv(sensorsFilePath)
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1493478949.9,40.478673768,-79.9227347777,3.6099998951,-0.0810241699,-0.9198455811,0.3198242188,-0.0721234623,0.0355715764,-0.001896437
1,1493478950.0,40.478673768,-79.9227347777,3.6099998951,-0.0263671875,-0.9717712402,0.3818664551,0.0316415495,0.0508482673,-0.0275288299
2,1493478950.3,40.4786584433,-79.9226960156,4.1399998665,-0.0024719238,-0.8837127686,0.4425201416,0.0160229087,-0.0107429255,-0.0094041544
3,1493478950.5,40.4786584433,-79.9226960156,4.1399998665,0.0433502197,-0.9283294678,0.496963501,0.0430207042,0.0103533051,-0.0251753944
4,1493478950.7,40.4786584433,-79.9226960156,4.1399998665,0.0786437988,-0.8535614014,0.4096221924,-0.0245514158,0.0054794539,0.0071979918


#### Grouping points into time intervals with labels

Each interval will represent 5 seconds (25 points) of data. They will also contain the following aggregate statistics for points in that interval:

- meanSpeed: mean speed in interval
- sdSpeed: standard deviation of speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- minAccelX, minAccelY, minAccelZ: minimum value of accelerometerX, accelerometerY, accelerometerY
- minGyroX, minGyroY, minGyroZ: minimum value of gyroX, gyroY, gyroZ
- meanAccelX, meanAccelY, meanAccelZ: mean of accelerometerX, accelerometerY, accelerometerY
- meanGyroX, meanGyroY, meanGyroZ: mean of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- condition (1/0): whether that road is in good (0) or bad (1) condition

In [48]:
# Time window (5 seconds) in number of points
# 5 points = 1 second
window = 25

intervalsDF = pd.DataFrame(columns=['meanSpeed', 'sdSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'minAccelX', 'minAccelY', 
                                    'minAccelZ', 'minGyroX', 'minGyroY', 'minGyroZ', 'meanAccelX', 
                                    'meanAccelY', 'meanAccelZ', 'meanGyroX', 'meanGyroY', 'meanGyroZ', 
                                    'sdAccelX', 'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY', 
                                    'sdGyroZ', 'condition'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+window]
    meanSpeed, sdSpeed = interval['speed'].mean(), interval['speed'].std()
    maxAccelX, minAccelX = interval['accelerometerX'].max(), interval['accelerometerX'].min()
    maxAccelY, minAccelY = interval['accelerometerY'].max(), interval['accelerometerY'].min()
    maxAccelZ, minAccelZ = interval['accelerometerZ'].max(), interval['accelerometerZ'].min()
    maxGyroX, minGyroX = interval['gyroX'].max(), interval['gyroX'].min()
    maxGyroY, minGyroY = interval['gyroY'].max(), interval['gyroY'].min()
    maxGyroZ, minGyroZ = interval['gyroZ'].max(), interval['gyroZ'].min()
    meanAccelX, sdAccelX = interval['accelerometerX'].mean(), interval['accelerometerX'].std()
    meanAccelY, sdAccelY = interval['accelerometerY'].mean(), interval['accelerometerY'].std()
    meanAccelZ, sdAccelZ = interval['accelerometerZ'].mean(), interval['accelerometerZ'].std()
    meanGyroX, sdGyroX = interval['gyroX'].mean(), interval['gyroX'].std()
    meanGyroY, sdGyroY = interval['gyroY'].mean(), interval['gyroY'].std()
    meanGyroZ, sdGyroZ = interval['gyroZ'].mean(), interval['gyroZ'].std()
    condition = label  # label for that road
    intervalSummary = [meanSpeed, sdSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, minAccelX, minAccelY, minAccelZ, minGyroX, minGyroY, minGyroZ, 
                       meanAccelX, meanAccelY, meanAccelZ, meanGyroX, meanGyroY, meanGyroZ,
                       sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, sdGyroZ, condition]
    intervalsDF.loc[intervalIndex] = intervalSummary

intervalsDF.head()

Unnamed: 0,meanSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,minAccelX,minAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,condition
0,6.4899998856,2.0949542842,0.1021881104,-0.8171539307,0.5183563232,0.0845513699,0.2085841029,0.1035583506,-0.189163208,-0.9821472168,...,0.0042767384,-0.0022387597,0.0099440305,0.0749603115,0.0480019025,0.0574807358,0.0402061664,0.0746290717,0.0437400911,1.0
1,11.4511999893,0.5984655489,0.986328125,-0.1281738281,0.6994934082,0.1930410958,0.7581532265,0.4692156946,-0.7867736816,-1.0929412842,...,-0.0745246536,0.07791963,-0.0242075591,0.4881209222,0.269081429,0.1516921909,0.1168136627,0.314006145,0.2384706689,1.0
2,11.301199913,0.3901102244,0.2220611572,-0.7303771973,0.4591827393,0.1105025431,0.2950414972,0.3324922,-0.4814910889,-1.2171783447,...,-0.0061162371,-0.0052369252,-0.0041433673,0.1719971308,0.1025274329,0.094086452,0.051698018,0.1678647341,0.0989280455,1.0
3,6.0583999252,2.8402430085,0.1121520996,-0.8798065186,0.4755096436,0.1180028037,0.0661377415,0.1107126665,-0.1504821777,-1.0952911377,...,0.0076757735,-0.0199611061,0.0102378091,0.070220057,0.0575722377,0.1496352599,0.0348953698,0.0529670397,0.0357358385,1.0
4,6.6536000347,2.6391868951,0.2885284424,-0.5120239258,0.902557373,0.3735006197,0.591402058,0.6783321634,-0.4931335449,-1.1611938477,...,0.0183150595,0.0004360447,0.0127677162,0.1726253106,0.1711448981,0.1701260388,0.1842685127,0.2886828238,0.2236127882,1.0


#### Save to CSV

In [49]:
intervalsDF.to_csv(intervalsFilePath)