In [159]:
import pandas as pd
import tqdm.auto as tqdm

complete_to_two_digits = lambda x: str(x).zfill(2) if x < 10 else str(x)

def read_csv_file(user_id, video_id):
    filepath = f'../sensory/raw/{video_id}_user{user_id}_raw.csv'
    #print(filepath)
    df = pd.read_csv(filepath)
    
    return df

csv_data = []
video_ids = ['coaster', 'coaster2', 'diving', 'drive', 'game', 'landscape', 'pacman', 'panel', 'ride', 'sport']
for user_id in range(1, 51):
    user_data = []
    for video_id in video_ids:

        df = read_csv_file(complete_to_two_digits(user_id), video_id)
 
        df['user_id'] = user_id # 添加标签（用户ID）
        df['video_id'] = video_id # 添加video_id字段
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
        df['timestamp_floor'] = pd.to_datetime(df['timestamp'], unit='s').dt.floor('100ms')
        user_data.append(df)

    user_data_combined = pd.concat(user_data, ignore_index=True)
    csv_data.append(user_data_combined)

csv_data = pd.concat(csv_data, ignore_index=True)


In [160]:
csv_data

Unnamed: 0,timestamp,rawTX,rawTY,rawTZ,rawYaw,rawPitch,rawRoll,user_id,video_id,timestamp_floor
0,2017-03-17 08:09:50.316999936,10.073,-0.109,-0.587,-16.907,-1.869,-2.928,1,coaster,2017-03-17 08:09:50.300
1,2017-03-17 08:09:50.324999936,10.071,-0.108,-0.587,-16.907,-1.870,-2.930,1,coaster,2017-03-17 08:09:50.300
2,2017-03-17 08:09:50.328999936,10.070,-0.109,-0.587,-16.907,-1.870,-2.931,1,coaster,2017-03-17 08:09:50.300
3,2017-03-17 08:09:50.332999936,10.069,-0.108,-0.588,-16.907,-1.870,-2.933,1,coaster,2017-03-17 08:09:50.300
4,2017-03-17 08:09:50.336999936,10.068,-0.109,-0.588,-16.908,-1.869,-2.934,1,coaster,2017-03-17 08:09:50.300
...,...,...,...,...,...,...,...,...,...,...
13188502,2017-02-15 07:45:58.303000064,47.180,23.764,-6.570,1.757,-12.715,-1.601,50,sport,2017-02-15 07:45:58.300
13188503,2017-02-15 07:45:58.306999808,47.179,23.759,-6.561,1.775,-12.739,-1.605,50,sport,2017-02-15 07:45:58.300
13188504,2017-02-15 07:45:58.311000064,47.177,23.747,-6.542,1.811,-12.785,-1.615,50,sport,2017-02-15 07:45:58.300
13188505,2017-02-15 07:45:58.313999872,47.175,23.736,-6.523,1.848,-12.828,-1.627,50,sport,2017-02-15 07:45:58.300


In [161]:
from datetime import timedelta
import numpy as np
import random
from tqdm.auto import tqdm

def process_data(csv_data):
    train_data = []
    train_labels = []
    test_data = []
    test_labels = []

    for user_id, user_data in tqdm(csv_data.groupby('user_id')):
        for _, video_data in user_data.groupby('video_id'):
            for _, floor_data in video_data.groupby(video_data['timestamp_floor']):
                floor_data = floor_data[['rawTX', 'rawTY', 'rawTZ', 'rawYaw', 'rawPitch', 'rawRoll']]
                if random.choice(range(0, 5)) == 0:
                    test_data.append(round(floor_data.mean(), 5).values)
                    test_labels.append(user_id)
                else:
                    train_data.append(round(floor_data.mean(), 5).values)
                    train_labels.append(user_id)

    return np.array(train_data), np.array(train_labels), np.array(test_data), np.array(test_labels)

In [162]:
train_data, train_labels, test_data, test_labels = process_data(csv_data)

  0%|          | 0/50 [00:00<?, ?it/s]

In [163]:
print('train_data.shape: {}'.format(train_data.shape))
print('train_labels.shape: {}'.format(train_labels.shape))
print('test_data.shape: {}'.format(test_data.shape))
print('test_labels.shape: {}'.format(test_labels.shape))

train_data.shape: (422232, 6)
train_labels.shape: (422232,)
test_data.shape: (106152, 6)
test_labels.shape: (106152,)


In [164]:
print(train_data)

[[ 10.06155  -0.11225  -0.5891  -16.90425  -1.8772   -2.93905]
 [  9.99928  -0.11508  -0.61612 -16.83312  -1.91308  -2.97856]
 [  9.95784  -0.13352  -0.64604 -16.78684  -1.97952  -2.97992]
 ...
 [ 46.95208  24.67552  -8.00876   0.38084  -8.52212  -2.35312]
 [ 47.11244  24.21812  -7.39356   0.75056 -10.59816  -1.8866 ]
 [ 47.17996  23.91444  -6.84324   1.3474  -12.04668  -1.63276]]


In [165]:
def append_to_file(input_file, output_file, additional_content):
    with open(input_file, 'r') as f:
        original_content = f.read()

    new_content = original_content + '\n' + additional_content

    with open(output_file, 'w') as f:
        f.write(new_content)


In [166]:
def initTimeNetData(data, label, step):
    append_string = ""
    for i in range(0, len(label), step):
        group = label[i:i+step]
        if len(set(group)) == 1:
            temp_transposed_matrix = np.transpose(data[i:i+step])
            append_string += ":".join([",".join(map(str, row)) for row in temp_transposed_matrix])
            append_string += ":" + str(group[0]) + "\n"

    return append_string

In [167]:
append_train_string = initTimeNetData(train_data, train_labels, 38)
# print(append_train_string)
append_to_file('template.ts', 'UserIdentify_TRAIN.ts', append_train_string)

In [168]:
append_test_string = initTimeNetData(test_data, test_labels, 28)
# print(append_test_string)
append_to_file('template.ts', 'UserIdentify_TEST.ts', append_test_string)