In [2]:
import os
from datetime import datetime

import s3fs
import pandas as pd

In [34]:
# Timestamp format to follow
time_format = '%Y-%m-%d %H:%M:%S.%f'

def convert_timestamp(x):
    return datetime.fromtimestamp(x).strftime(time_format)

# Possible activities
activity_labels = {
    0: "sitting",
    1: "standing",
    2: "moving"
}

# Columns to use from the csv data
session_sensor_data_columns = [
    "loggingTime",
    "locationSpeed",
    "locationAltitude",
    "motionRotationRateX",
    "motionRotationRateY",
    "motionRotationRateZ",
    "motionUserAccelerationX",
    "motionUserAccelerationY",
    "motionUserAccelerationZ",
    "label"
]

########################################

In [35]:
# Connect to s3 bucket
#s3 = s3fs.S3FileSystem(anon=False)

# List files by device
#devices=...

# For each device
activity_df = pd.DataFrame()
devices = ['tyler_phone']
files = os.listdir()
for device in devices:
	device_files = [file for file in files if file.startswith(device)]
	device_df = pd.DataFrame()
	for i, f in enumerate(device_files):
		# Load data recursively
		session_df = pd.read_csv(f)[session_sensor_data_columns]
		session_df["deviceSessionId"] = device + "_" + str(i)
		# Add to batch of device specific data
		device_df = pd.concat((session_df, device_df))
	device_df["loggingTime"] = device_df["loggingTime"].apply(lambda x: convert_timestamp(x))
	device_df["label"] = device_df["label"].apply(lambda x: activity_labels.get(int(x), "unknown"))
	# Add to the larger batch of all activity data
	activity_df = pd.concat((device_df, activity_df))

# Collect session ids
session_ids = {v: k for k, v in dict(enumerate(activity_df["deviceSessionId"].unique())).items()}

# Assign session ids
activity_df["sessionId"] = activity_df["deviceSessionId"].apply(lambda x: session_ids.get(x))


In [36]:
activity_df.head()

Unnamed: 0,loggingTime,locationSpeed,locationAltitude,motionRotationRateX,motionRotationRateY,motionRotationRateZ,motionUserAccelerationX,motionUserAccelerationY,motionUserAccelerationZ,label,deviceSessionId,sessionId
0,2019-01-22 13:59:17.132867,0.0,153.0088,-0.102961,0.245842,-0.000257,-0.009714,-0.000662,0.024201,sitting,tyler_phone_0,0
1,2019-01-22 13:59:17.167912,0.0,153.0088,-0.024043,0.057821,-0.001242,-0.001701,0.003398,0.015437,sitting,tyler_phone_0,0
2,2019-01-22 13:59:17.217576,0.0,153.0088,0.014322,-0.036183,0.000927,0.00538,0.004661,0.006528,sitting,tyler_phone_0,0
3,2019-01-22 13:59:17.267294,0.0,153.0088,-0.022982,0.048212,-0.00125,-0.001284,0.003683,0.015052,sitting,tyler_phone_0,0
4,2019-01-22 13:59:17.317040,0.0,153.0088,0.011134,-0.026575,-0.000134,0.004026,0.006257,0.009402,sitting,tyler_phone_0,0


In [37]:
session_ids

{'tyler_phone_0': 0}