In [1]:
import pandas as pd
import os
from itertools import count
cuid = count()

In [2]:
def iterate_files(directory) -> list:
    """
    Iterates through every file in a directory and its subdirectories.
    
    :param directory: The path of the directory to iterate through.

    :return list: returns list of all file paths iterated which includes 'vehicle_tracks'
    """
    file_path_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if str(file).__contains__('vehicle_tracks'):
                file_path = os.path.join(root, file)
                file_path_list.append(file_path)

    return file_path_list


In [3]:
def get_vehicle_tracks_csvs_from_dir(path:str):
    vtl = []
    vtl = iterate_files(path)
    vtl.sort()
    return vtl

In [4]:
def get_df_from_file_list(file_path_list):

    df_all_files = pd.read_csv(file_path_list[0])
    df_all_files = df_all_files.loc[df_all_files['agent_type'].isin(['truck', 'car', 'bike', 'Truck', 'Car', 'Bike'])]
    
    for path in file_path_list[1:]:
        df_current_file = pd.read_csv(path)

        # filter only for vehicles
        df_current_file = df_current_file.loc[df_current_file['agent_type'].isin(['truck', 'car', 'bike', 'Truck', 'Car', 'Bike'])]

        df_all_files = pd.concat([df_all_files, df_current_file], ignore_index=True)

    return df_all_files

In [5]:
# import k729_2022
k729_2022_vehicle_tracks_list = get_vehicle_tracks_csvs_from_dir('../../test-area-autonomous-driving-dataset-master/datasets/recorded_trackfiles/k729_2022-03-16')
df_k729_2022 = get_df_from_file_list(k729_2022_vehicle_tracks_list)
df_k729_2022_cuid = df_k729_2022.copy()
df_k729_2022_cuid['track_id'], unique_ids = pd.factorize(df_k729_2022['track_id'])

# import k733_2018
k733_2018_vehicle_tracks_list = get_vehicle_tracks_csvs_from_dir('../../test-area-autonomous-driving-dataset-master/datasets/recorded_trackfiles/k733_2018-05-02')
df_k733_2018 = get_df_from_file_list(k733_2018_vehicle_tracks_list)
df_k733_2018_cuid = df_k733_2018.copy()
df_k733_2018_cuid['track_id'], unique_ids = pd.factorize(df_k733_2018['track_id'])

# import k733_2020 
k733_2020_vehicle_tracks_list = get_vehicle_tracks_csvs_from_dir('../../test-area-autonomous-driving-dataset-master/datasets/recorded_trackfiles/k733_2020-09-15')
df_k733_2020 = get_df_from_file_list(k733_2020_vehicle_tracks_list)
df_k733_2020_cuid = df_k733_2020.copy()
df_k733_2020_cuid['track_id'], unique_ids = pd.factorize(df_k733_2020['track_id'])

In [6]:
# makes all k733 intersection track_ids continuous for both 2018 and 2020 measurements
def make_k733_cuid_continuous():
    max_cuid_k733_2018 = df_k733_2018_cuid['track_id'].max() + 1
    df_k733_2020_cuid['track_id'] = df_k733_2020_cuid['track_id'] + max_cuid_k733_2018

In [7]:
def overwrite_export_file(file_list: list, df_list: list):
    '''
    export pandas dataframe to given path as csv.
    Path for export file and corresponding dataframe must have matching indices
    '''

    for path in file_list:
        try:
            os.remove(path)
            print(f'{path} removed')
        except:
            print(f'{path} not removed')

        df_list[file_list.index(path)].to_csv(path_or_buf=path)
        print(f'{path} exported')

In [8]:
intersection_file_list = ['../datasets/k729_2022_cuid.csv', '../datasets/k733_2018_cuid.csv', '../datasets/k733_2020_cuid.csv']
intersection_df_list = [df_k729_2022_cuid, df_k733_2018_cuid, df_k733_2020_cuid]
overwrite_export_file(intersection_file_list, intersection_df_list)

../datasets/k729_2022_cuid.csv removed
../datasets/k729_2022_cuid.csv exported
../datasets/k733_2018_cuid.csv removed
../datasets/k733_2018_cuid.csv exported
../datasets/k733_2020_cuid.csv removed
../datasets/k733_2020_cuid.csv exported


In [10]:
df_k729_2022_cuid_grouped = df_k729_2022_cuid.groupby('track_id').agg(list)
df_k733_2018_cuid_grouped = df_k733_2018_cuid.groupby('track_id').agg(list)
df_k733_2020_cuid_grouped = df_k733_2020_cuid.groupby('track_id').agg(list)
intersection_file_list = ['../datasets/k729_2022_cuid_grouped.csv', '../datasets/k733_2018_cuid_grouped.csv', '../datasets/k733_2020_cuid_grouped.csv']
intersection_df_list = [df_k729_2022_cuid_grouped, df_k733_2018_cuid_grouped, df_k733_2020_cuid_grouped]
overwrite_export_file(intersection_file_list, intersection_df_list)

../datasets/k729_2022_cuid_grouped.csv not removed
../datasets/k729_2022_cuid_grouped.csv exported
../datasets/k733_2018_cuid_grouped.csv not removed
../datasets/k733_2018_cuid_grouped.csv exported
../datasets/k733_2020_cuid_grouped.csv not removed
../datasets/k733_2020_cuid_grouped.csv exported
