In [29]:
# Read the CSV file

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
import matplotlib.ticker as mtick
from datetime import datetime
import time

df = pd.read_csv('datasets/2016-06-08_tests.csv')
df.head()

Unnamed: 0,timestamp,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z
0,1470487102552,0.0,0.0,0.0,0.000381,0.001389,-0.00061
1,1470487102592,1.891403,-1.238388,9.521729,0.0,0.0,0.0
2,1470487102653,1.905777,-1.247971,9.569611,0.0,0.0,0.0
3,1470487102715,1.872253,-1.243179,9.588776,0.0,0.0,0.0
4,1470487102780,0.0,0.0,0.0,-0.001755,-0.000748,-0.004868


In [30]:
# Here we compute the size of the chosen sample
f = lambda x: df[x] != 0

# For now we only keep values from the accelerometer
df = df[f('acc_x') & f('acc_y') & f('acc_z')]
df = df.sort_values(['timestamp'])

# Select data between Sat, 06 Aug 2016 12:38:22.552 GMT and 13:19:49.507 GMT
df_extract = df[(df['timestamp'] > 1470487102552) & (df['timestamp'] < 1470489589507)]
#df_extract = df[(df['timestamp'] > 1470487102552) & (df['timestamp'] < 1470487189507)]

# Sort data
df_extract = df_extract.sort_values(['timestamp'])

len(df_extract)

41791

In [31]:
# Convert an epoch time into a date
date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(1470487102.552))
print(date)

2016-08-06 12:38:22


In [8]:
# Plot the graph with accelerations

date = df_extract['timestamp']
count = 1

def format_time(t, x):
    global date
    global count
    count += 1
    #print(date)
    #print(x)
    if count % 100 == 0:
        return time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(date / 1000))

    return ""

plt.gca().xaxis.set_major_locator(mtick.FixedLocator(date))
plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(format_time))
plt.gcf().autofmt_xdate()

plt.plot(date, df_extract['acc_x'], color='red')
#plt.plot(date, df_extract['acc_y'], color='green')
#plt.plot(date, df_extract['acc_z'], color='blue')
plt.xlabel('time')
#plt.ylabel('x(red), y(green), z(blue)')
plt.title('Acceleration over time')

plt.show()

TypeError: cannot convert the series to <class 'int'>

In [32]:
# Compute the difference between 2 consecutives ticks & add the result in the new column 'diff'
df_extract['diff'] = df_extract['timestamp'].shift(-1) - df_extract['timestamp']

# Remove the last value because it contains a NaN
df_extract = df_extract[:-1]

df_extract.head()

Unnamed: 0,timestamp,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,diff
1,1470487102592,1.891403,-1.238388,9.521729,0.0,0.0,0.0,61.0
2,1470487102653,1.905777,-1.247971,9.569611,0.0,0.0,0.0,62.0
3,1470487102715,1.872253,-1.243179,9.588776,0.0,0.0,0.0,66.0
5,1470487102781,1.934509,-1.247971,9.516937,0.0,0.0,0.0,57.0
6,1470487102838,1.924927,-1.252762,9.588776,0.0,0.0,0.0,61.0


In [33]:
# Some statistics about values with no gap (we have chosen for now a gap smaller than 100ms)
df_extract['diff'][df_extract['diff'] < 100].describe()

count    39306.000000
mean        17.822012
std          9.453136
min          1.000000
25%         17.000000
50%         18.000000
75%         18.000000
max         89.000000
Name: diff, dtype: float64

In [34]:
# Number of gaps of more than 100ms
df_extract_100 = df_extract[df_extract['diff'] > 100]
len(df_extract_100)

2483

In [35]:
# Some statistics about values with a gap > 100ms
df_extract_100['diff'].describe()

count      2483.000000
mean        719.406363
std        7157.751855
min         105.000000
25%         183.000000
50%         185.000000
75%         188.000000
max      329311.000000
Name: diff, dtype: float64

In [37]:
# Number of gaps of more than 500ms
df_extract_500 = df_extract[df_extract['diff'] > 500]
len(df_extract_500)

97

In [39]:
# Number of gaps of more than 1s
df_extract_1000 = df_extract[df_extract['diff'] > 1000]
len(df_extract_1000)

90

In [40]:
# Poucentage of data with a big gap
len(df_extract_500) / len(df_extract) * 100

0.002321129456807849

In [41]:
# Histogram to represent data with its gap (gap > 500)
plt.hist(df_extract_500['diff'], bins=100)
plt.show()

In [42]:
# Compute euclidien norm, and add it to a new column named 'norm'
sqrt_root = lambda x: np.power(df_extract[x], 2)
df_extract['norm'] = np.sqrt(sqrt_root('acc_x') + sqrt_root('acc_y') + sqrt_root('acc_z')) / 9.81
df_extract.head()

Unnamed: 0,timestamp,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,diff,norm
1,1470487102592,1.891403,-1.238388,9.521729,0.0,0.0,0.0,61.0,0.997598
2,1470487102653,1.905777,-1.247971,9.569611,0.0,0.0,0.0,62.0,1.002754
3,1470487102715,1.872253,-1.243179,9.588776,0.0,0.0,0.0,66.0,1.003938
5,1470487102781,1.934509,-1.247971,9.516937,0.0,0.0,0.0,57.0,0.998106
6,1470487102838,1.924927,-1.252762,9.588776,0.0,0.0,0.0,61.0,1.005096


In [46]:
# Detect number of rows for which the norm > 2
len(df_extract[np.abs(df_extract['norm']) > 2])

12

In [47]:
# Row selection with norm > 2
df_extract[np.abs(df_extract['norm']) > 2]

Unnamed: 0,timestamp,acc_x,acc_y,acc_z,gyr_x,gyr_y,gyr_z,diff,norm
5751,1470487686187,2.10173,-7.370911,19.729218,0.0,0.0,0.0,1.0,2.15757
5755,1470487686191,-1.725113,-9.39473,22.175552,0.0,0.0,0.0,1.0,2.461287
9127,1470487781012,-0.820328,-15.426819,14.822342,0.0,0.0,0.0,1.0,2.182404
28170,1470488404505,0.813766,-2.251556,20.898132,0.0,0.0,0.0,18.0,2.144222
29507,1470488437705,-1.768829,1.216187,20.602081,0.0,0.0,0.0,17.0,2.111479
39041,1470488879319,3.615799,-6.214188,18.802963,0.0,0.0,0.0,19.0,2.05205
39043,1470488879338,4.729263,-8.995666,19.512527,0.0,0.0,0.0,18.0,2.242671
39044,1470488879356,1.646454,-7.782333,18.432312,0.0,0.0,0.0,16.0,2.046432
39058,1470488879620,-0.362122,-7.905655,19.951477,0.0,0.0,0.0,17.0,2.187945
39059,1470488879637,-0.513397,-9.452805,22.17743,0.0,0.0,0.0,18.0,2.458046


In [49]:
# Row description
df_extract['norm'].describe()

count    41790.000000
mean         1.025039
std          0.120005
min          0.148751
25%          0.976944
50%          1.008013
75%          1.049487
max          2.461287
Name: norm, dtype: float64

In [50]:
# Plot norm values
plt.plot(np.abs(df_extract['norm'] -1))
plt.show()

In [52]:
# Pourcentage of values for which norm > 2
len(df_extract[np.abs(df_extract['norm']) > 2]) / len(df_extract) * 100

0.0002871500358937545