# Data Exploration: Good vs. Bad Road Conditions

Description: Visualizing the difference between good and bad road conditions using accelerometer and gyroscope data without pothole labels.

In [2]:
import numpy as np
import pandas as pd
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode()
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


### Get trip data from CSV

In [3]:
goodRoadFilePath = 'data/No Pothole Negley 419/normal_sensors.csv'
badRoadFilePath = 'data/Cobblestone 419/bump_sensors.csv'
goodRoadDF = pd.read_csv(goodRoadFilePath)
badRoadDF = pd.read_csv(badRoadFilePath)

goodRoadDF = goodRoadDF[goodRoadDF['gyroX'] > -100]   #dropping invalid gyro data
badRoadDF = badRoadDF[badRoadDF['gyroX'] > -100]   #dropping invalid gyro data
print goodRoadDF.shape
print badRoadDF.head()

(550, 10)
      timestamp   latitude  longitude  speed  accelerometerX  accelerometerY  \
0  1.492618e+09  40.479540 -79.922397   1.17       -0.030884       -0.852554   
1  1.492618e+09  40.479540 -79.922397   1.17        0.018356       -0.918076   
2  1.492618e+09  40.479540 -79.922397   1.17        0.180893       -0.925156   
3  1.492618e+09  40.479521 -79.922406   2.45       -0.196838       -0.942047   
4  1.492618e+09  40.479521 -79.922406   2.45       -0.043716       -0.887177   

   accelerometerZ     gyroX     gyroY     gyroZ  
0        0.499725 -0.030093 -0.089149  0.048968  
1        0.301804 -0.046515 -0.044369  0.028426  
2        0.432632 -0.040333  0.055567 -0.057066  
3        0.452545  0.015845 -0.163904  0.075220  
4        0.410736 -0.048418 -0.120941  0.047796  


### Acceleration: Good Road vs. Bad Road

In [4]:
goodRoadPoints = go.Scatter3d(
    x=goodRoadDF['accelerometerX'],
    y=goodRoadDF['accelerometerY'],
    z=goodRoadDF['accelerometerZ'],
    name="Good Road (Negley)",
    mode='markers',
    marker=dict(
        size=8,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

badRoadPoints = go.Scatter3d(
    x=badRoadDF['accelerometerX'],
    y=badRoadDF['accelerometerY'],
    z=badRoadDF['accelerometerZ'],
    name="Bad Road (Cobblestone)",
    mode='markers',
    marker=dict(
        size=8,
        color='rgba(256,0,0,0.8)',
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

data = [goodRoadPoints, badRoadPoints]
layout = go.Layout(
    margin=dict(l=0, r=0, b=0, t=30),
    title="Accelerometer Readings for Good and Bad Road Conditions",
    scene=go.Scene(
        xaxis=go.XAxis(title='Accelerometer X'),
        yaxis=go.YAxis(title='Accelerometer Y'),
        zaxis=go.ZAxis(title='Accelerometer Z')
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='simple-3d-scatter')

### Gyro: Good Road vs. Bad Road

In [5]:
goodRoadPoints = go.Scatter3d(
    x=goodRoadDF['gyroX'],
    y=goodRoadDF['gyroY'],
    z=goodRoadDF['gyroZ'],
    name="Good Road (Negley)",
    mode='markers',
    marker=dict(
        size=8,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

badRoadPoints = go.Scatter3d(
    x=badRoadDF['gyroX'],
    y=badRoadDF['gyroY'],
    z=badRoadDF['gyroZ'],
    name="Bad Road (Cobblestone)",
    mode='markers',
    marker=dict(
        size=8,
        color='rgba(256,0,0,0.8)',
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

data = [goodRoadPoints, badRoadPoints]
layout = go.Layout(
    margin=dict(l=0, r=0, b=0, t=30),
    title="Gyro Readings for Good and Bad Road Conditions",
    scene=go.Scene(
        xaxis=go.XAxis(title='Gyro X'),
        yaxis=go.YAxis(title='Gyro Y'),
        zaxis=go.ZAxis(title='Gyro Z')
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='simple-3d-scatter')

In [73]:
a = np.zeros(goodRoadDF.shape[0])
b = np.ones(badRoadDF.shape[0])
y = np.append(a,b)
print y
frames = [goodRoadDF, badRoadDF]
print y.shape
X = pd.concat(frames)
X = X[['gyroY', 'accelerometerY' ,'accelerometerX']]
print X[:3]

[ 0.  0.  0. ...,  1.  1.  1.]
(1062,)
      gyroY  accelerometerY  accelerometerX
0  0.010706       -0.957642        0.002365
1  0.020283       -0.972656        0.018127
2 -0.018002       -0.962906        0.034073


In [74]:
clf = svm.SVC(C=100)
#clf.fit(X, y)  
scores = cross_val_score(clf, X,y, cv=5)
np.mean(scores)

0.72497564000354331

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf.fit(X_train,y_train)

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [76]:
y_pred = clf.predict(X_test)
np.mean(y_pred==y_test)

0.74358974358974361

In [77]:
e = [1,2,3]
b = [1,2,3]
confusion_matrix(y_test, y_pred)


array([[189,   8],
       [ 82,  72]])

In [78]:
a = classification_report(y_test, y_pred)
print a

             precision    recall  f1-score   support

        0.0       0.70      0.96      0.81       197
        1.0       0.90      0.47      0.62       154

avg / total       0.79      0.74      0.72       351

