### Pothole/Non-Pothole Data Processing

Description: stiches sensor and pothole files together into a new combined file. Breaks up data into time intervals.

In [14]:
import pandas as pd
pd.options.display.max_rows = 100
pd.set_option('display.precision', 10)  #for displaying timestamps

In [48]:
sensorsFilePath = 'data/Pothole_Non_Pothole/trip1_sensors.csv'
potholesFilePath = 'data/Pothole_Non_Pothole/trip1_potholes.csv'
combinedFilePath = 'data/Pothole_Non_Pothole/trip1_intervals.csv'
sensorsDF = pd.read_csv(sensorsFilePath)
potholesDF = pd.read_csv(potholesFilePath)

#### Sensor Data

In [49]:
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1492638964.5,40.4474447877,-79.9441886565,0.0,0.016998291,-0.9622344971,0.2038879395,-0.0169944299,0.0192586494,0.0072398034
1,1492638964.8,40.4474447877,-79.9441886565,0.0,0.0507507324,-0.9629974365,0.1939544678,-0.0180831301,0.0043726442,0.0008697884
2,1492638964.9,40.4474447877,-79.9441886565,0.0,0.0374145508,-0.9592285156,0.1915435791,-0.014992798,-0.0094757935,0.0009371664
3,1492638965.1,40.4474447941,-79.9441886568,0.0,0.0537872314,-0.9638519287,0.2772521973,-0.0468934731,-0.0018221348,0.0016570188
4,1492638965.3,40.4474447941,-79.9441886568,0.0,0.0316467285,-0.9530029297,0.2710571289,-0.0073705646,0.0032384039,-0.0043486757


#### Pothole Data

In [50]:
potholesDF.head()

Unnamed: 0,timestamp
0,1492639065.7
1,1492639090.8
2,1492639133.7
3,1492639140.1
4,1492639143.1


#### Grouping points into time intervals with combined sensor/pothole data

Each interval will represent 2 seconds (10 points) of data. They will also contain the following aggregate statistics for points in that interval:

- avgSpeed: average speed in interval
- sdSpeed: standard deviation of speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- meanAccelX, meanAccelY, meanAccelZ: mean of accelerometerX, accelerometerY, accelerometerY
- meanGyroX, meanGyroY, meanGyroZ: mean of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- pothole (True/False): whether a pothole was encountered in that interval

In [51]:
# Time window (2 seconds) in number of points
# 5 points = 1 second
window = 10

def intervalHasPothole(intervalStart, intervalEnd, potholesDF):
    potholeTimestamps = potholesDF['timestamp']
    for index, potholeTime in potholeTimestamps.iteritems():
        if intervalStart < potholeTime and potholeTime <= intervalEnd:
            return True
    return False

intervalsDF = pd.DataFrame(columns=['avgSpeed', 'sdSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'meanAccelX', 'meanAccelY', 
                                    'meanAccelZ', 'meanGyroX', 'meanGyroY', 'meanGyroZ', 'sdAccelX', 
                                    'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY', 'sdGyroZ', 'pothole'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+10]
    intervalStart = list(interval['timestamp'])[0]
    intervalEnd = list(interval['timestamp'])[-1]
    avgSpeed, sdSpeed = interval['speed'].mean(), interval['speed'].std()
    meanAccelX, maxAccelX, sdAccelX = interval['accelerometerX'].mean(), interval['accelerometerX'].max(), interval['accelerometerX'].std()
    meanAccelY, maxAccelY, sdAccelY = interval['accelerometerY'].mean(), interval['accelerometerY'].max(), interval['accelerometerY'].std()
    meanAccelZ, maxAccelZ, sdAccelZ = interval['accelerometerZ'].mean(), interval['accelerometerZ'].max(), interval['accelerometerZ'].std()
    meanGyroX, maxGyroX, sdGyroX = interval['gyroX'].mean(), interval['gyroX'].max(), interval['gyroX'].std()
    meanGyroY, maxGyroY, sdGyroY = interval['gyroY'].mean(), interval['gyroY'].max(), interval['gyroY'].std()
    meanGyroZ, maxGyroZ, sdGyroZ = interval['gyroZ'].mean(), interval['gyroZ'].max(), interval['gyroZ'].std()
    pothole = intervalHasPothole(intervalStart, intervalEnd, potholesDF)
    intervalSummary = [avgSpeed, sdSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, meanAccelX, meanAccelY, meanAccelZ, meanGyroX, meanGyroY, 
                       meanGyroZ, sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, 
                       sdGyroZ, pothole]
    intervalsDF.loc[intervalIndex] = intervalSummary
    
intervalsDF.head()

Unnamed: 0,avgSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,meanAccelX,meanAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
0,0.0409999996,0.1296533829,0.0537872314,-0.9365844727,0.2954864502,-0.0073705646,0.0192586494,0.0072398034,0.0426895142,-0.9526733398,...,-0.0207825102,-0.001779471,0.0002182993,0.0117103716,0.0091027376,0.0402467266,0.0108185714,0.0088875207,0.0034371383,False
1,1.3639999986,1.0240572718,0.1206207275,-0.8756561279,0.3988342285,0.0223926574,0.0065535068,0.0330500954,0.0663406372,-0.9179672241,...,-0.0227801207,-0.0189992842,-0.0032762208,0.0312521846,0.0224946599,0.0287338761,0.0201777652,0.0198507601,0.0149607604,False
2,4.4359999657,1.3511575563,0.1206054688,-0.9062957764,0.4042663574,-0.0105541074,0.0515992788,0.0189752891,0.0706039429,-0.9351348877,...,-0.0323631863,-0.0110659936,-0.0068066935,0.0241821661,0.022506665,0.0572957478,0.0226791194,0.024039437,0.0165922362,False
3,7.0599999428,0.5753258741,0.1153869629,-0.8938598633,0.3790740967,0.067696756,-0.0156564577,0.0619286154,0.0556274414,-0.9323181152,...,-0.0387494732,-0.0744177223,0.0204798686,0.0475755765,0.036483841,0.0683509107,0.095503417,0.0528694406,0.0282090289,False
4,9.148999691,0.8649015949,0.1279144287,-0.9144439697,0.3573150635,0.0311954701,0.0948317043,0.0566598175,0.0907669067,-0.9612030029,...,-0.0063861538,-0.015273655,0.0146877861,0.038033127,0.0320317857,0.0520478773,0.0228188383,0.0481948088,0.0323390537,False


#### Intervals with potholes

In [52]:
intervalsDF[intervalsDF['pothole'] == True]

Unnamed: 0,avgSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,meanAccelX,meanAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,pothole
50,6.8499999523,1.092753841,0.3713684082,-0.7599639893,0.0574951172,0.0955992273,0.004700213,0.3573581351,0.0418395996,-0.9944137573,...,-0.0091326185,-0.1053484742,0.0415474702,0.1657598389,0.0972527676,0.0774625294,0.0931031564,0.1560375314,0.1272662709,True
63,9.7730002403,0.4266158802,0.7452545166,-0.7157897949,0.4294433594,0.1542066145,0.9876295618,0.3140687179,-0.0355255127,-0.9447250366,...,-0.0554729797,0.1646836234,-0.1358690193,0.3995513438,0.1208089904,0.1063244362,0.1373731823,0.3849751941,0.2918284908,True
84,8.4669998169,0.3697462216,0.5500640869,0.1288909912,1.1977233887,0.6774192317,0.2612374608,0.5292553298,0.0421829224,-0.8941497803,...,0.0502343821,-0.0763877425,-0.0315948377,0.3866300932,0.4700403447,0.3087975117,0.4144112768,0.2169684655,0.2709625169,True
87,11.2520001411,0.4902788384,0.4663238525,-0.7689971924,0.4359893799,0.4413161527,0.4806707494,0.2838407742,0.075038147,-0.990586853,...,-0.0233140312,0.0782229268,-0.0355190056,0.1813540025,0.1269017814,0.1488041942,0.2131949802,0.1762694745,0.1463484211,True
89,10.5079996109,0.5761713654,0.3969268799,-0.7638397217,0.466217041,0.3582273909,0.5614351042,0.3840733692,-0.0839569092,-0.9997390747,...,0.0438602658,-0.0544294467,-0.0052746569,0.3261499812,0.2213769087,0.2235452701,0.1869681105,0.2774333027,0.1951522166,True
90,11.8930000305,0.3202793056,0.3297271729,-0.0530090332,0.399017334,0.2288158714,0.3896289896,0.5772199263,-0.1546859741,-0.8139923096,...,-0.032030451,-0.3315326364,0.2053473502,0.3648205141,0.4247936674,0.2026065198,0.1833714723,0.3717890386,0.1965112056,True
150,6.7470000744,0.7057550001,0.2240753174,-0.8856506348,0.2401733398,0.2130643388,0.1396106275,0.109828497,0.0501998901,-0.9618927002,...,0.0161309265,-0.0285018424,0.027320544,0.0794953949,0.0380152056,0.0376067897,0.0803381757,0.0682613625,0.0634008462,True
167,9.4960000038,0.1391400844,0.4257049561,-0.4599304199,0.4817047119,0.3341745188,0.1486105141,0.2341573736,0.1040496826,-0.8968109131,...,0.0067150275,-0.0593895309,0.0602498651,0.2128048112,0.2075295255,0.1310851096,0.1738883019,0.1310130159,0.1179231002,True
168,10.061000061,0.4285492454,0.4675445557,-0.4845428467,0.7462768555,0.3085983187,0.6166956968,0.31100049,-0.0882034302,-0.9624603271,...,-0.0457261563,0.0290121573,-0.035514851,0.3126488079,0.2466035194,0.1955896656,0.2709330509,0.3043388314,0.2135240335,True
185,6.4239999294,0.3681545098,0.5672454834,-0.7443237305,0.4038696289,0.2853723582,0.4381589752,0.2233143132,0.127015686,-0.9240158081,...,-0.0213168202,0.1101295674,0.014744405,0.2450231946,0.119794546,0.1393804801,0.1843044599,0.1958714004,0.120273432,True


#### Save to CSV

In [53]:
intervalsDF.to_csv(combinedFilePath)

### Road Conditions (Good Road/Bad Road) Data Processing

Description: Breaks up road condtions data into time intervals. Only contains sensor, not pothole data. Labels are: 0 (good road) and 1 (bad road)

In [80]:
sensorsFilePath = 'data/Good_Road_Bad_Road/bad1_sensors.csv'
intervalsFilePath = 'data/Good_Road_Bad_Road/bad1_intervals.csv'
label = 1
sensorsDF = pd.read_csv(sensorsFilePath)
sensorsDF.head()

Unnamed: 0,timestamp,latitude,longitude,speed,accelerometerX,accelerometerY,accelerometerZ,gyroX,gyroY,gyroZ
0,1492618275.4,40.4795396654,-79.922396578,1.1699999571,-0.0308837891,-0.8525543213,0.4997253418,-0.0300926551,-0.0891487849,0.0489678093
1,1492618275.5,40.4795396654,-79.922396578,1.1699999571,0.0183563232,-0.9180755615,0.3018035889,-0.0465145053,-0.0443690627,0.0284260488
2,1492618275.7,40.4795396654,-79.922396578,1.1699999571,0.1808929443,-0.9251556396,0.4326324463,-0.0403327758,0.0555665898,-0.0570656832
3,1492618275.9,40.4795205966,-79.922406301,2.4500000477,-0.1968383789,-0.9420471191,0.452545166,0.0158447432,-0.1639037167,0.075220186
4,1492618276.1,40.4795205966,-79.922406301,2.4500000477,-0.0437164307,-0.8871765137,0.410736084,-0.0484181328,-0.1209413356,0.0477960184


#### Grouping points into time intervals with labels

Each interval will represent 2 seconds (10 points) of data. They will also contain the following aggregate statistics for points in that interval:

- avgSpeed: average speed in interval
- sdSpeed: standard deviation of speed in interval
- maxAccelX, maxAccelY, maxAccelZ: maximum value of accelerometerX, accelerometerY, accelerometerY
- maxGyroX, maxGyroY, maxGyroZ: maximum value of gyroX, gyroY, gyroZ
- meanAccelX, meanAccelY, meanAccelZ: mean of accelerometerX, accelerometerY, accelerometerY
- meanGyroX, meanGyroY, meanGyroZ: mean of gyroX, gyroY, gyroZ
- sdAccelX, sdAccelY, sdAccelZ: standard deviation of accelerometerX, accelerometerY, accelerometerY
- sdGyroX, sdGyroY, sdGyroZ: standard deviation of gyroX, gyroY, gyroZ
- condition (1/0): whether that road is in good (0) or bad (1) condition

In [81]:
# Time window (2 seconds) in number of points
# 5 points = 1 second
window = 10

intervalsDF = pd.DataFrame(columns=['avgSpeed', 'sdSpeed', 'maxAccelX', 'maxAccelY', 'maxAccelZ', 
                                    'maxGyroX', 'maxGyroY', 'maxGyroZ', 'meanAccelX', 'meanAccelY', 
                                    'meanAccelZ', 'meanGyroX', 'meanGyroY', 'meanGyroZ', 'sdAccelX', 
                                    'sdAccelY', 'sdAccelZ', 'sdGyroX', 'sdGyroY', 'sdGyroZ', 'condition'])
for i in xrange(0, len(sensorsDF), window):
    intervalIndex = len(intervalsDF)
    interval = sensorsDF[i:i+10]
    avgSpeed, sdSpeed = interval['speed'].mean(), interval['speed'].std()
    meanAccelX, maxAccelX, sdAccelX = interval['accelerometerX'].mean(), interval['accelerometerX'].max(), interval['accelerometerX'].std()
    meanAccelY, maxAccelY, sdAccelY = interval['accelerometerY'].mean(), interval['accelerometerY'].max(), interval['accelerometerY'].std()
    meanAccelZ, maxAccelZ, sdAccelZ = interval['accelerometerZ'].mean(), interval['accelerometerZ'].max(), interval['accelerometerZ'].std()
    meanGyroX, maxGyroX, sdGyroX = interval['gyroX'].mean(), interval['gyroX'].max(), interval['gyroX'].std()
    meanGyroY, maxGyroY, sdGyroY = interval['gyroY'].mean(), interval['gyroY'].max(), interval['gyroY'].std()
    meanGyroZ, maxGyroZ, sdGyroZ = interval['gyroZ'].mean(), interval['gyroZ'].max(), interval['gyroZ'].std()
    condition = label  # label for that road
    intervalSummary = [avgSpeed, sdSpeed, maxAccelX, maxAccelY, maxAccelZ, maxGyroX, maxGyroY, 
                       maxGyroZ, meanAccelX, meanAccelY, meanAccelZ, meanGyroX, meanGyroY, 
                       meanGyroZ, sdAccelX, sdAccelY, sdAccelZ, sdGyroX, sdGyroY, 
                       sdGyroZ, condition]
    intervalsDF.loc[intervalIndex] = intervalSummary

intervalsDF.head()

Unnamed: 0,avgSpeed,sdSpeed,maxAccelX,maxAccelY,maxAccelZ,maxGyroX,maxGyroY,maxGyroZ,meanAccelX,meanAccelY,...,meanGyroX,meanGyroY,meanGyroZ,sdAccelX,sdAccelY,sdAccelZ,sdGyroX,sdGyroY,sdGyroZ,condition
0,2.7860000491,1.254070772,0.1808929443,-0.8525543213,0.5707550049,0.0181909881,0.0555665898,0.075220186,0.0172531128,-0.916418457,...,-0.0145856273,-0.0493661915,0.0115997976,0.1070258214,0.0318941699,0.0696734677,0.0266260954,0.0633355034,0.0393465259,1.0
1,6.85,1.2135349788,0.1405487061,-0.8174591064,0.5398864746,0.0680344448,0.0883663481,0.0647374514,-0.018989563,-0.9220001221,...,-0.0146826196,-0.0317101259,-0.0027340278,0.0792360778,0.0527759547,0.0644064283,0.0440579348,0.0845809155,0.0480705549,1.0
2,12.220000267,0.9697652295,0.3830108643,-0.7495117188,0.4267425537,0.1038062909,0.1479945249,0.1966004106,0.0548019409,-0.8994781494,...,-0.0486688162,-0.08232787,0.0405501696,0.1655059043,0.0879079265,0.0992661667,0.0730354482,0.1397555041,0.0869833156,1.0
3,14.3449997902,0.4058253717,0.2136077881,-0.878036499,0.328704834,0.1483316811,0.0774852045,0.1043772726,0.0295379639,-0.9833557129,...,-0.0363297517,-0.0380569715,0.0036712475,0.0925979683,0.0631723973,0.0609963884,0.0991165854,0.0999984573,0.0455686299,1.0
4,14.1849999428,0.3636617098,0.4680786133,-0.8633880615,0.2009124756,0.0444146028,0.0601224595,0.1211354801,0.1487304688,-0.9509643555,...,-0.0362288712,-0.002526248,0.0234121687,0.1502934091,0.0652926594,0.0344063481,0.0438457349,0.0539620351,0.0487246404,1.0


#### Save to CSV

In [82]:
intervalsDF.to_csv(intervalsFilePath)