In [None]:
import pandas as pd
#np.set_printoptions(threshold=np.inf)
pd.set_option('display.width', 500) # 设置字符显示宽度
pd.set_option('display.max_rows', 10) # 设置显示最大行
import tqdm.auto as tqdm

def read_csv_file(user_id, video_id):
    filepath = f'../../../data/360_video_viewing_dataset/sensory/raw/{video_id}_user{user_id}_raw.csv'
    #print(filepath)
    df = pd.read_csv(filepath)
    df['video_id'] = video_id  # 添加video_id字段
    return df

data_with_labels = []
video_ids = ['coaster', 'coaster2', 'diving', 'drive', 'game', 'landscape', 'pacman', 'panel', 'ride', 'sport']
for user_id in range(1, 51):
    user_data = []
    for video_id in range(len(video_ids)):
        if len(str(user_id)) == 1:
            user_id = '0' + str(user_id)
        df = read_csv_file(user_id, video_ids[video_id])

        # 对数据进行预处理，例如计算每秒的坐标等
        # df = preprocess(df)
 
        # 添加标签（用户ID）
        df['user_id'] = user_id

        user_data.append(df)

    user_data_combined = pd.concat(user_data, ignore_index=True)
    data_with_labels.append(user_data_combined)

data_with_labels = pd.concat(data_with_labels, ignore_index=True)


In [None]:
from datetime import timedelta
from datetime import datetime
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import tensorflow as tf
from keras.layers import Reshape
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional
from keras.utils import to_categorical

class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(input_shape[-1],), initializer='zeros', trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        q = tf.nn.tanh(tf.linalg.matmul(x, self.W) + self.b)
        a = tf.nn.softmax(tf.reduce_sum(q * x, axis=-1), axis=-1)
        return tf.reduce_sum(a[:, :, tf.newaxis] * x, axis=1)

def process_data(data, i):

    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
    data['rounded_time'] = pd.to_datetime(data['timestamp'], unit='s').dt.floor('100ms')
    
    train_data = []
    train_labels = []
    test_data = []
    test_labels = []
    print("Processing",i)
    for user_id, user_data in tqdm(data.groupby('user_id')):
        for video_id, video_data in user_data.groupby('video_id'):
            for second, second_data in video_data.groupby(video_data['rounded_time'].dt.floor('1s')):
                time_slices = []
                for _, chunk_data in second_data.groupby(pd.cut(pd.to_datetime(second_data['timestamp']), pd.date_range(second, second + timedelta(seconds=1), periods=11)), observed = True):

                    if not chunk_data.empty:
                        chunk_data = chunk_data[['rawTX', 'rawTY', 'rawTZ', 'rawYaw', 'rawPitch', 'rawRoll']]
                        time_slices.append(chunk_data.mean().values)

                if len(time_slices) == 10:
                    if video_id == video_ids[i]:
                        test_data.append(time_slices)
                        test_labels.append(user_id)
                    else:
                        train_data.append(time_slices)
                        train_labels.append(user_id)

    return np.array(train_data), np.array(train_labels), np.array(test_data), np.array(test_labels)




In [None]:
# Load your data_with_labels dataframe here
# data_with_labels = pd.DataFrame(...)

# Define the path to save the models
model_path = './model_{}.tflite'

for i in range(9):
    train_data, train_labels, test_data, test_labels = process_data(data_with_labels, i)
    print(train_data, train_labels, test_data, test_labels)
    # Convert user IDs to integers
    unique_labels = np.unique(np.concatenate([train_labels, test_labels]))
    label_map = {label: idx for idx, label in enumerate(unique_labels)}
    int_train_labels = np.array([label_map[label] for label in train_labels])
    int_test_labels = np.array([label_map[label] for label in test_labels])
    
    # Convert label data to one-hot encoding
    one_hot_train_labels = to_categorical(int_train_labels, num_classes=len(unique_labels))
    one_hot_test_labels = to_categorical(int_test_labels, num_classes=len(unique_labels))

    # Train the model
    with tf.device('/device:GPU:0'):
        # Define the model
        model = Sequential()
        model.add(Bidirectional(LSTM(128, input_shape=(10, 6), return_sequences=True)))
        model.add(AttentionLayer())
        model.add(Reshape((1, -1)))
        model.add(Bidirectional(LSTM(64)))
        model.add(Dense(len(unique_labels), activation='softmax'))

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        print(f'train_data.shape: {train_data.shape}')
        print(f'test_data.shape: {test_data.shape}')
        print(f'train_data.len: {len(train_data)}')
        print(f'test_data.len: {len(test_data)}')

        # Reshape the training and testing data
        train_data = train_data.reshape(train_data.shape[0], 10, 6)
        test_data = test_data.reshape(test_data.shape[0], 10, 6)

        # Train the model
        batch_size = min(128, train_data.shape[0])
        print(f'batch_size: {batch_size}')

        model.fit(train_data, one_hot_train_labels, epochs=50, batch_size=batch_size, validation_data=(test_data, one_hot_test_labels))

        # Save the model to a file
        converter = tf.lite.TFLiteConverter.from_keras_model(model)
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
        tflite_model = converter.convert()

        # Save the model.
        with open(model_path.format(i), 'wb') as f:
            f.write(tflite_model)
        print(f"Model {i} saved to {model_path.format(i)}")

In [None]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels = train_test_split(train_data, train_labels, test_size=0.2, random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Reshape
from keras.utils import to_categorical
import tensorflow as tf
# 将用户ID转换为从0开始的整数
unique_labels = np.unique(np.concatenate([train_labels, test_labels]))
label_map = {label: idx for idx, label in enumerate(unique_labels)}
int_train_labels = np.array([label_map[label] for label in train_labels])
int_test_labels = np.array([label_map[label] for label in test_labels])

# 将标签数据转换为one-hot编码
one_hot_train_labels = to_categorical(int_train_labels, num_classes=len(unique_labels))
one_hot_test_labels = to_categorical(int_test_labels, num_classes=len(unique_labels))

class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(input_shape[-1],), initializer='zeros', trainable=True)
        super(AttentionLayer, self).build(input_shape)

    def call(self, x):
        q = tf.nn.tanh(tf.linalg.matmul(x, self.W) + self.b)
        a = tf.nn.softmax(tf.reduce_sum(q * x, axis=-1), axis=-1)
        return tf.reduce_sum(a[:, :, tf.newaxis] * x, axis=1)

In [None]:
with tf.device('/device:GPU:0'):
    # 定义模型
    model = Sequential()
    model.add(LSTM(128, input_shape=(10, 6), return_sequences=True))
    model.add(AttentionLayer())
    model.add(Reshape((1, -1)))
    model.add(LSTM(64))
    model.add(Dense(len(unique_labels), activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # 重塑训练和测试数据
    train_data = train_data.reshape(train_data.shape[0], 10, 6)
    test_data = test_data.reshape(test_data.shape[0], 10, 6)

    # 训练模型
    batch_size = min(128, train_data.shape[0])
    model.fit(train_data, one_hot_train_labels, epochs=50, batch_size=batch_size, validation_data=(test_data, one_hot_test_labels))

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()

# Save the model to disk
with open('modelB.tflite', 'wb') as f:
    f.write(tflite_model)

In [None]:
print(train_data[1])

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='pandas')
import seaborn as sns
from keras.utils import to_categorical
sns.set_style("whitegrid")

def majority_vote(predictions, window_size):
    vote_results = []
    for i in range(0, len(predictions) - window_size + 1, window_size):
        votes = predictions[i:i+window_size]
        vote_result = np.argmax(np.bincount(votes))
        vote_results.append(vote_result)
    return vote_results

def compute_accuracy_with_window(model, data_with_labels, label_map, window_sizes):
    acc_results = []

    for video_id in range(9):
        for window_size in window_sizes:
            accuracy_per_window = []
            for user_id in range(1, 51):
                if len(str(user_id)) == 1:
                    user_id = '0' + str(user_id)
                # 从原始数据中筛选出user_id对应的测试集和video_id的数据
                test_data = data_with_labels[(data_with_labels['user_id'] == user_id) & (data_with_labels['video_id'] == video_ids[video_id])]

                if test_data.empty:
                    continue

                # 处理数据并获取对应的标签
                train_data, train_labels, test_data, test_labels = process_data(test_data, video_id)
                # print(f'test_data: {test_data}')
                # print(f'test_labels: {test_labels}')
                # print(f'label_map["label"]: {label_map["10"]}')
                # print('------------')
                # for label in test_labels:
                #     print(f'label: {label}, type(label): {type(label)}')
                #     print(label_map)
                #     print(label_map['10'])
                #     print(label_map[str(label)])
                # print('------------')
                int_test_labels = np.array([label_map[str(label)] for label in test_labels])
                one_hot_test_labels = to_categorical(int_test_labels, num_classes=len(label_map))

                # 重塑测试数据
                test_data = test_data.reshape(test_data.shape[0], 10, 6)

                # 获取模型预测
                predictions = model.predict(test_data)
                predictions = np.argmax(predictions, axis=1)

                # 应用滑动窗口和多数投票
                vote_results = majority_vote(predictions, window_size)

                # 计算滑动窗口和多数投票后的准确率
                correct = np.sum(vote_results == int_test_labels[:len(vote_results)])
                accuracy = correct / len(vote_results)
                accuracy_per_window.append(accuracy)

            # 计算当前窗口大小下所有用户的平均准确率
            acc_results.append((video_id, window_size, np.mean(accuracy_per_window)))
            print('Video ID: %d, Window size: %d, average accuracy: %.2f' % (video_id, window_size, np.mean(accuracy_per_window)))

    return acc_results

# 设置窗口大小范围
window_sizes = list(range(1, 6, 1))

# 计算各个窗口大小下的准确率
#print(f'label_map: {label_map}')
accuracy_results = compute_accuracy_with_window(model, data_with_labels, label_map, window_sizes)

# 转换成pandas DataFrame格式以便绘图
accuracy_df = pd.DataFrame(accuracy_results, columns=['video_id', 'window_size', 'accuracy'])

# 绘制折线图
plt.figure(figsize=(8, 6))
sns.lineplot(data=accuracy_df, x='window_size', y='accuracy', hue='video_id', palette='tab10')
plt.xlabel('Window Size (M)')
plt.ylabel('Average Accuracy')
plt.title('Average Accuracy for each video with varying window size')
plt.xticks(window_sizes)
plt.grid(True)
plt.legend(title='Video ID', loc='lower right')
plt.show()


In [None]:
plt.figure(figsize=(8, 6))
sns.lineplot(data=accuracy_df, x='window_size', y='accuracy')
plt.xlabel('Window Size (M)')
plt.ylabel('Average Accuracy')
plt.title('Average Accuracy for video 0 with varying window size')
plt.xticks(window_sizes)
plt.grid(True)
plt.show()


In [None]:
def compute_accuracy_per_user(model, data_with_labels, unique_labels, label_map):
    user_acc_results = {}

    for user_id in range(1, 51): # assume user_id from 1 to 50
        acc_results = []
        if len(str(user_id)) == 1:
            user_id = '0' + str(user_id)
        user_id = str(user_id)
        for i in range(0, 9):
            # 从原始数据中筛选出video_id和user_id对应的测试集
            test_data = data_with_labels[(data_with_labels['video_id'] == video_ids[i]) & (data_with_labels['user_id'] == user_id)]
            #print(test_data)
            # 如果测试数据为空，跳过此轮循环
            if test_data.empty:
                continue

            # 处理数据并获取对应的标签
            train_data, train_labels, test_data, test_labels = process_data(test_data, i)
            int_test_labels = np.array([label_map[str(label)] for label in test_labels])
            one_hot_test_labels = to_categorical(int_test_labels, num_classes=len(unique_labels))

            # 重塑测试数据
            test_data = test_data.reshape(test_data.shape[0], 10, 6)

            try:
                # 计算准确率
                loss, accuracy = model.evaluate(test_data, one_hot_test_labels, batch_size=len(test_data), verbose=0)
            except ValueError as e:
                print(f"Skipping evaluation for user_id: {user_id}, video_id: {i}. Evaluation returned: {e}")
                continue

            acc_results.append(accuracy)
            print(f'user_id: {user_id}, video_id: {video_ids[i]}, accuracy: {accuracy}.')

        # 若对于某个用户没有任何准确度数据（可能该用户没有对应的数据），则不添加到最终的结果中
        if acc_results:
            user_acc_results[user_id] = acc_results

    return user_acc_results



# 计算各个测试集的准确率
user_accuracy_results = compute_accuracy_per_user(model, data_with_labels, unique_labels, label_map)

print(f'user_accuracy_results: {user_accuracy_results}')

# 创建箱形图
plt.figure(figsize=(20, 10)) # You might need to adjust the figure size
plt.boxplot([user_accuracy_results[user_id] for user_id in sorted(user_accuracy_results.keys())], labels=sorted(user_accuracy_results.keys()))
plt.xlabel('User_ID')
plt.ylabel('Accuracy')
plt.title('Accuracy for each user')
plt.xticks(rotation=90)  # It might be necessary to rotate the x-tick labels for better visualization
plt.show()


In [None]:
def reorganize_accuracy_by_video(user_accuracy_results):
    video_acc_results = {i: [] for i in range(0, 9)}
    for user_id, acc_list in user_accuracy_results.items():
        for video_id, accuracy in enumerate(acc_list):
            video_acc_results[video_id].append(accuracy)
    return video_acc_results

video_accuracy_results = reorganize_accuracy_by_video(user_accuracy_results)

plt.figure(figsize=(20, 10)) # You might need to adjust the figure size
plt.boxplot([video_accuracy_results[video_id] for video_id in sorted(video_accuracy_results.keys())], labels=sorted(video_accuracy_results.keys()))
plt.xlabel('Video_ID')
plt.ylabel('Accuracy')
plt.title('CaseB Accuracy for each video')
plt.xticks(rotation=90)  # It might be necessary to rotate the x-tick labels for better visualization
plt.show()


In [None]:
plt.close('all')