### Pothole/Non-Pothole Data Processing

Description: stiches sensor and pothole files together into a new combined file. Breaks up data into time intervals.

In [14]:
import pandas as pd
pd.options.display.max_rows = 100
pd.set_option('display.precision', 10)  #for displaying timestamps

In [60]:
sensorsFilePath = 'data/Pothole_Non_Pothole/trip6_sensors.csv'
potholesFilePath = 'data/Pothole_Non_Pothole/trip6_potholes.csv'
combinedFilePath = 'data/Pothole_Non_Pothole/trip6_intervals.csv'
sensorsDF = pd.read_csv(sensorsFilePath)
potholesDF = pd.read_csv(potholesFilePath)

#### Sensor Data

In [61]:
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1493003562.6,40.4475257722,-79.9441930614,0.0,0.217666626,-0.9800415039,0.1583404541,0.032959548,0.0487339837,0.1078993031
1,1493003562.7,40.4475257722,-79.9441930614,0.0,0.0113525391,-0.9469299316,0.212097168,0.0235993357,-0.0873756522,0.0175265294
2,1493003562.9,40.4475257722,-79.9441930614,0.0,0.023651123,-0.9569549561,0.208114624,-0.009586581,0.0053814496,0.0030866037
3,1493003563.1,40.4475257822,-79.9441930619,0.0,0.0610809326,-0.9560852051,0.201461792,-0.0148543136,0.0011598067,-0.0033537188
4,1493003563.3,40.4475257822,-79.9441930619,0.0,0.0701599121,-0.9564666748,0.1945037842,-0.0127994185,-0.0009803096,0.000937699


#### Pothole Data

In [62]:
potholesDF.head()

Unnamed: 0,timestamp
0,1493003685.7
1,1493003686.7
2,1493003692.6
3,1493003709.0
4,1493003714.0


#### Grouping points into time intervals with combined sensor/pothole data

Each interval will represent 2 seconds (10 points) of data. They will also contain the following aggregate statistics for points in that interval:

- avgSpeed: average speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- pothole (True/False): whether a pothole was encountered in that interval

In [63]:
# Time window (2 seconds) in number of points
# 5 points = 1 second
window = 10

def intervalHasPothole(intervalStart, intervalEnd, potholesDF):
    potholeTimestamps = potholesDF['timestamp']
    for index, potholeTime in potholeTimestamps.iteritems():
        if intervalStart < potholeTime and potholeTime <= intervalEnd:
            return True
    return False

intervalsDF = pd.DataFrame(columns=['avgSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'sdAccelX', 
                                    'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY',
                                    'sdGyroZ', 'pothole'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+10]
    intervalStart = list(interval['timestamp'])[0]
    intervalEnd = list(interval['timestamp'])[-1]
    avgSpeed = interval['speed'].mean()
    maxAccelX, sdAccelX = interval['accelerometerX'].max(), interval['accelerometerX'].std()
    maxAccelY, sdAccelY = interval['accelerometerY'].max(), interval['accelerometerY'].std()
    maxAccelZ, sdAccelZ = interval['accelerometerZ'].max(), interval['accelerometerZ'].std()
    maxGyroX, sdGyroX = interval['gyroX'].max(), interval['gyroX'].std()
    maxGyroY, sdGyroY = interval['gyroY'].max(), interval['gyroY'].std()
    maxGyroZ, sdGyroZ = interval['gyroZ'].max(), interval['gyroZ'].std()
    pothole = intervalHasPothole(intervalStart, intervalEnd, potholesDF)
    intervalSummary = [avgSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, 
                       sdGyroZ, pothole]
    intervalsDF.loc[intervalIndex] = intervalSummary
    
intervalsDF.head()

Unnamed: 0,avgSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
0,0.0,0.217666626,-0.9469299316,0.212097168,0.032959548,0.0487339837,0.1078993031,0.0557300765,0.0087466703,0.0144098904,0.0174007225,0.0331251346,0.0339172088,False
1,0.0479999989,0.0785064697,-0.9331359863,0.271774292,-0.003179548,0.0096606169,0.0072680329,0.0069277981,0.0133113464,0.0490568595,0.0071068851,0.0053237358,0.0050463825,False
2,0.2259999931,0.0674438477,-0.946182251,0.2098083496,-0.006333796,0.0054416371,0.0020445088,0.0045312778,0.0070150467,0.0209263038,0.0036947555,0.0040217262,0.0025294318,False
3,0.0569999993,0.1011352539,-0.9107055664,0.3683624268,0.0032314797,0.0086475504,0.0052911685,0.0088680387,0.0120398286,0.0522918066,0.0081263068,0.0063212338,0.0049780064,False
4,1.2889999986,0.1100006104,-0.8900909424,0.365234375,-0.0053862433,0.0597397633,0.0158157148,0.0243626337,0.0170524731,0.0215171973,0.0065494289,0.022648622,0.0163744159,False


#### Intervals with potholes

In [64]:
intervalsDF[intervalsDF['pothole'] == True]

Unnamed: 0,avgSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
61,9.607999897,0.319152832,-0.6574707031,0.6968536377,0.1304916976,0.3210717663,0.1497466186,0.3189966289,0.231352721,0.1523701298,0.0915202664,0.2103093672,0.166110547,True
62,11.5019997597,0.4201660156,-0.8198394775,0.6541442871,0.0927733471,0.6253592261,0.1302813079,0.1945242572,0.1340302622,0.1228299154,0.1610075926,0.2283527206,0.2272700085,True
65,13.7519997597,0.1572875977,-0.730255127,0.3397369385,0.3867916578,0.205310279,0.3228387737,0.0842193626,0.1124882588,0.1092399833,0.1828185347,0.1327779127,0.1268884029,True
73,11.7749997139,0.5521087646,-0.7353668213,0.5048828125,0.6640530925,0.2008843716,0.5188905731,0.3440100626,0.127034333,0.0902955121,0.2291065399,0.2967643671,0.2459798362,True
75,13.6869999886,0.6703491211,-0.731918335,0.6246643066,0.6866204533,0.6902575324,0.1733624659,0.3664967171,0.1070546572,0.0947587328,0.2565079154,0.291971253,0.2426217373,True
76,15.4270003319,0.8647766113,-0.5925292969,0.4685821533,0.3289283578,0.3860012316,0.5476415276,0.4312860428,0.1883023337,0.1619944648,0.1814881912,0.2984256896,0.2925179091,True
77,16.798000145,0.5782165527,-0.4690856934,0.6850280762,0.2723574899,0.7928491555,0.2537073728,0.304803002,0.361255068,0.2315758112,0.2555924534,0.2863408018,0.182486871,True
78,15.1100003242,0.1742248535,-0.9388580322,0.118270874,0.1890745837,0.0537247476,0.2211267927,0.134559983,0.1104204978,0.0662914828,0.084934606,0.0985365618,0.0880066292,True
80,7.3979997635,0.4824676514,-0.6853637695,0.7453918457,0.1667258685,0.6318067391,0.3324674326,0.2740424441,0.1349041061,0.1065849686,0.1066668312,0.2872687266,0.2769360018,True
82,16.0609994888,0.83152771,-0.5032806396,0.5885467529,0.1831077713,0.4759843848,0.2338279406,0.4260477774,0.2156084985,0.2016259649,0.1749885477,0.2703476917,0.1986334967,True


#### Save to CSV

In [65]:
intervalsDF.to_csv(combinedFilePath)

### Road Conditions (Good Road/Bad Road) Data Processing

Description: Breaks up road condtions data into time intervals. Only contains sensor, not pothole data. Labels are: 0 (good road) and 1 (bad road)

In [94]:
sensorsFilePath = 'data/Good_Road_Bad_Road/good7_sensors.csv'
intervalsFilePath = 'data/Good_Road_Bad_Road/good7_intervals.csv'
label = 0
sensorsDF = pd.read_csv(sensorsFilePath)
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1492618395.9,40.4780206967,-79.92337843,0.0,0.0052032471,-0.96043396,0.2159576416,-0.0272774275,-0.02321877,0.0349657072
1,1492618396.1,40.4780206967,-79.92337843,0.0,0.018157959,-0.9752044678,0.2300567627,-0.0127448237,-0.007364173,-0.0065087657
2,1492618396.3,40.4780206695,-79.9233783578,0.0,0.0162658691,-0.9566345215,0.2545013428,-0.0098116181,-0.0148268831,0.0084675207
3,1492618396.5,40.4780206695,-79.9233783578,0.0,0.0180511475,-0.944442749,0.2947998047,-0.0107352024,-0.0116310897,-7.98948e-05
4,1492618396.7,40.4780206695,-79.9233783578,0.0,0.0035552979,-0.9240112305,0.3209686279,0.0252486313,-0.0310111793,0.0056696036


#### Grouping points into time intervals with labels

Each interval will represent 2 seconds (10 points) of data. They will also contain the following aggregate statistics for points in that interval:

- avgSpeed: average speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- condition (1/0): whether that road is in good (0) or bad (1) condition

In [95]:
# Time window (2 seconds) in number of points
# 5 points = 1 second
window = 10

intervalsDF = pd.DataFrame(columns=['avgSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'sdAccelX', 
                                    'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY',
                                    'sdGyroZ', 'condition'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+10]
    avgSpeed = interval['speed'].mean()
    maxAccelX, sdAccelX = interval['accelerometerX'].max(), interval['accelerometerX'].std()
    maxAccelY, sdAccelY = interval['accelerometerY'].max(), interval['accelerometerY'].std()
    maxAccelZ, sdAccelZ = interval['accelerometerZ'].max(), interval['accelerometerZ'].std()
    maxGyroX, sdGyroX = interval['gyroX'].max(), interval['gyroX'].std()
    maxGyroY, sdGyroY = interval['gyroY'].max(), interval['gyroY'].std()
    maxGyroZ, sdGyroZ = interval['gyroZ'].max(), interval['gyroZ'].std()
    condition = label  # label for that road
    intervalSummary = [avgSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, 
                       sdGyroZ, condition]
    intervalsDF.loc[intervalIndex] = intervalSummary
    
intervalsDF.head()

Unnamed: 0,avgSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,condition
0,0.1860000014,0.0453338623,-0.9240112305,0.3701171875,0.0368275231,-0.007364173,0.0612628251,0.0185236996,0.0151200292,0.0444723579,0.0210337683,0.0267860302,0.0258485412,0.0
1,1.6900000334,0.0111083984,-0.8469696045,0.486038208,0.0384065113,-0.1403469916,0.1385679999,0.0758855454,0.0262386831,0.048795016,0.0429031809,0.1518327475,0.0359589095,0.0
2,5.2259999275,-0.0151672363,-0.8586120605,0.5547180176,0.0282082023,-0.1388798561,0.1485559193,0.0718334135,0.0347396115,0.0388794141,0.0284313117,0.1033828119,0.0385402366,0.0
3,10.2640001297,0.1625213623,-0.8689880371,0.5539398193,0.0068107682,0.0971758187,0.0791837687,0.066944371,0.0292145468,0.0527328969,0.0265331905,0.0727838073,0.0560772949,0.0
4,13.7450001717,0.1595916748,-0.8238525391,0.4143676758,0.0877935022,0.1021434131,0.0977862152,0.0682071131,0.0672720919,0.0644501172,0.0568567408,0.0497310522,0.0414713498,0.0


#### Save to CSV

In [97]:
intervalsDF.to_csv(intervalsFilePath)