In [1]:
import os
data_directory = "data/data_with_goggle/"
files = os.listdir(data_directory)
print(files[:1])
print(len(files))

['gyro_sober_ys_s8_3.txt']
150


In [2]:
gyro_files = [file for file in files if file.startswith('gyro')]
accel_files = [file for file in files if file.startswith('accel')]
header_names = [file[5:] for file in gyro_files] # remove "gyro_"
print(len(gyro_files))
print(len(accel_files))

75
75


In [3]:
from datetime import datetime, timedelta
def read_filedata(filename):
    """
    Returns list of tuple consisting - `timestamp`, `x`, `y`, `z`
    [
    (`timestamp`,`x`,`y`,`z`), ...
    ]
    """
    with open(filename) as f:
        lines = f.readlines()
        # line.strip().split()
        # will give `timestamp` `x` `y` `z`
        lines = [line.strip().split() for line in lines]
        
        # Some data has less datapoints because the output was not flushed
        # So we only filter out normal ones
        lines = [line for line in lines if len(line) == 4]
        lines = [(datetime.fromtimestamp(float(ts)/1000), float(x), float(y), float(z)) for (ts, x, y, z) in lines]
    return lines

In [4]:
def preprocess(data, seconds=5):
    print("original data points: ", len(data))
    # Remove duplicates
    no_dup = []
    for idx, line in enumerate(data):
        ts, x, y, z = line
        if 0 <= idx-1 and ts == data[idx-1][0]:
            continue
        else:
            no_dup.append(line)
    #print("Data points after removing duplicates", len(no_dup))
    
    # Remove n seconds from start / end
    delta = timedelta(days=0,seconds=seconds)
    start_time = no_dup[0][0] + delta
    end_time = no_dup[-1][0] - delta
    no_dup = [(ts, x, y, z) for (ts, x, y, z) in no_dup if start_time <= ts <= end_time]
    #print("Data points after removing edges", len(no_dup))
    
    #print("Time between" , start_time.strftime('%Y-%m-%d %H:%M:%S'), "-",  end_time.strftime('%Y-%m-%d %H:%M:%S'))
    #print("Time took ", (end_time - start_time).total_seconds(), "seconds")
    return no_dup

In [5]:
def make_chunks(data):    
    """
    data : list of tuple consisting - `timestamp`, `x`, `y`, `z`
    [
    (`timestamp`,`x`,`y`,`z`), ...
    ]
    returns : 
    """
    jump = timedelta(days=0,seconds=2)
    chunk_size = timedelta(days=0,seconds=10)
    start_time = data[0][0]
    end_time = data[-1][0]

    start = start_time
    chunks = []
    start_pos = 0
    while start + chunk_size < end_time:
        new_chunk = []
        i = 0
        while data[start_pos+i][0] < start + chunk_size:
            elem = data[start_pos+i]
            ts = str(elem[0].timestamp() * 1000)
            tup = map(str, (ts, *elem[1:]))
            new_chunk.append(tup)
            i += 1
        chunks.append(new_chunk)
        
        # find new start_pos
        while data[start_pos][0] < start + jump:
            start_pos += 1
        start = data[start_pos][0]
    
    return chunks
    


In [6]:
from utils import check_dir
new_dir = "data/chunk/"
check_dir(new_dir)

for name in header_names:
    color, person, phone, number = name[:-4].split('_') # remove ".txt" and split by '_'
    print(color, person, phone, number)
    gyro = read_filedata(data_directory + 'gyro_' + name)
    accel = read_filedata(data_directory + 'accel_' + name)
    
    #print("gyroscope data points : ", len(gyro))
    #print("accelerometer data points : ", len(accel))
    gyro = preprocess(gyro)
    accel = preprocess(accel)
    gyro_chunks = make_chunks(gyro)
    accel_chunks = make_chunks(accel)
    
    for idx, chunk in enumerate(gyro_chunks):
        filename = new_dir+"gyro_"+name[:-4]+"_"+str(idx)+".txt"
        with open(filename, "w") as f:
            content = "\n".join("\t".join(line) for line in chunk)
            f.write(content)
    
    for idx, chunk in enumerate(accel_chunks):
        filename = new_dir+"accel_"+name[:-4]+"_"+str(idx)+".txt"
        with open(filename, "w") as f:
            content = "\n".join("\t".join(line) for line in chunk)
            f.write(content)

print("done!")

sober ys s8 3
original data points:  34672
original data points:  34672
black jj v10 6
original data points:  17849
original data points:  17849
blue ys s8 1
original data points:  38457
original data points:  38457
black jj v10 1
original data points:  18996
original data points:  18996
black ys v10 6
original data points:  14938
original data points:  14938
blue ys s8 5
original data points:  36026
original data points:  36026
blue jh a5 7
original data points:  11816
original data points:  11816
black ys v10 8
original data points:  16245
original data points:  16245
sober ys s8 6
original data points:  36091
original data points:  36091
black jj v10 2
original data points:  18714
original data points:  18714
blue ys s8 2
original data points:  36660
original data points:  36660
sober jj s8 8
original data points:  32935
original data points:  32935
blue jj a5 3
original data points:  14853
original data points:  14853
sober jh s8 8
original data points:  29032
original data points:

In [7]:
files = os.listdir(new_dir)
print(files[:1])
print(len(files))

['gyro_black_jj_v10_10_6.txt']
4022
