In [1]:
import os
import pandas as pd
from datetime import datetime

In [2]:
def process_trajectory_file(filepath):
    with open(filepath, 'r') as file:
        lines = file.readlines()[6:]  
        data = []
        for line in lines:
            lat, lon, _, alt, _, date_str, time_str = line.strip().split(',')
            datetime_str = f"{date_str} {time_str}"
            dt = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
            data.append((float(lat), float(lon), float(alt), dt))
        
    df = pd.DataFrame(data, columns=['Latitude', 'Longitude', 'Altitude', 'Datetime'])
    
    return df

trajectory_file_path = 'DATA/003/Trajectory/20081023175854.plt'
processed_df = process_trajectory_file(trajectory_file_path)
processed_df.head()

Unnamed: 0,Latitude,Longitude,Altitude,Datetime
0,39.999844,116.326752,492.0,2008-10-23 17:58:54
1,39.999876,116.326708,492.0,2008-10-23 17:58:59
2,39.999915,116.326672,492.0,2008-10-23 17:59:04
3,40.000059,116.32714,142.0,2008-10-23 17:59:09
4,40.000039,116.327172,132.0,2008-10-23 17:59:14


In [3]:
def process_user_data(user_folder):
    trajectory_folder = os.path.join(user_folder, 'Trajectory')
    trajectory_files = [os.path.join(trajectory_folder, f) for f in os.listdir(trajectory_folder) if f.endswith('.plt')]
    
    user_data = pd.DataFrame()
    for file in trajectory_files:
        trajectory_data = process_trajectory_file(file)
        user_data = pd.concat([user_data, trajectory_data], ignore_index=True)
    
    user_data = user_data.sort_values(by='Datetime').reset_index(drop=True)
    
    earliest_time = user_data['Datetime'].min()
    user_data['Seconds_Since_Start'] = (user_data['Datetime'] - earliest_time).dt.total_seconds()
    
    return user_data

user_folder_path = 'DATA/003'
processed_user_data = process_user_data(user_folder_path)
processed_user_data.head()

Unnamed: 0,Latitude,Longitude,Altitude,Datetime,Seconds_Since_Start
0,39.999844,116.326752,492.0,2008-10-23 17:58:54,0.0
1,39.999876,116.326708,492.0,2008-10-23 17:58:59,5.0
2,39.999915,116.326672,492.0,2008-10-23 17:59:04,10.0
3,40.000059,116.32714,142.0,2008-10-23 17:59:09,15.0
4,40.000039,116.327172,132.0,2008-10-23 17:59:14,20.0


In [4]:
def save_user_data(user_folder, df):
    user_id = os.path.basename(user_folder)
    output_folder = os.path.join('processed_data', user_id)  
    os.makedirs(output_folder, exist_ok=True)
    
    output_file = os.path.join(output_folder, f'{user_id}_data.csv')
    df.to_csv(output_file, index=False)

In [6]:
def process_and_save_user(data_folder, user_id):
    user_folder = os.path.join(data_folder, user_id)
    if os.path.isdir(user_folder):
        user_data = process_user_data(user_folder)
        save_user_data(user_folder, user_data)
    else:
        print(f"User folder '{user_folder}' does not exist.")

specific_user_id = '003'  
data_folder = 'DATA'
process_and_save_user(data_folder, specific_user_id)

In [29]:
def process_and_save_all_users(data_folder):
    user_ids = [user_id for user_id in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, user_id))]
    for user_id in user_ids:
        process_and_save_user(data_folder, user_id)

data_folder = 'DATA'
process_and_save_all_users(data_folder)

In [7]:
def keep_first_100_lines(input_csv_path, output_csv_path=None):
    df = pd.read_csv(input_csv_path)
    df_first_100 = df.head(100)
    if output_csv_path is None:
        output_csv_path = input_csv_path
    
    df_first_100.to_csv(output_csv_path, index=False)
    print(f"Saved the first 100 lines to '{output_csv_path}'.")

input_csv = 'processed_data/003/003_data.csv'  
output_csv = 'processed_data/003/003_data.csv'

keep_first_100_lines(input_csv, output_csv)

Saved the first 100 lines to 'processed_data/003/003_data.csv'.
