In [None]:
import os
import numpy as np
from tqdm import tqdm

# Ask user for node numbers for nodes_veh_list and nodes_acc_list
nodes_veh_input = input("Enter the numbers of columns used from the vehicle file, separated by space: ")
nodes_acc_input = input("Enter the numbers of columns used from the accident file separated by space: ")

#Vehicle File Columns (CRSS) = 14 19 40 42 43 59 60 61 62 63 64 67
#Accident File Columns (CRSS) = 5 7 13 21 24 25 28 31 33
#Vehicle File Columns (stats19) = 4 6 11 15
#Accident File Columns (stats19) = 8 9 10 19 20 21 22 27 28 29 32

# Convert the user input to sets of integers
nodes_veh_list = set(map(int, nodes_veh_input.split()))
nodes_acc_list = set(map(int, nodes_acc_input.split()))

header = []

# Ask the user to specify the vehicle data file
vehicle_data_file_path = input("Enter the path to the vehicle data file: ")
#/Users/islambabaev/Google Drive/Programming/HARA Automation/usdb_analysis/prepared_db/VEHICLE.csv

# Read the vehicle data file and process the header
with open(vehicle_data_file_path, 'r') as vehicle_data_file:
    vehicle_data_list = vehicle_data_file.readlines()
    header.extend([c for k, c in enumerate(vehicle_data_list[0].split(',')) if k in nodes_veh_list])

# Ask the user to specify the accident data file
accident_data_file_path = input("Enter the path to the accident data file: ")
#/Users/islambabaev/Google Drive/Programming/HARA Automation/usdb_analysis/prepared_db/ACCIDENT.csv

# Read the accident data file and process the header
with open(accident_data_file_path, 'r') as accident_data_file:
    accident_data_list = accident_data_file.readlines()
    header.extend([c for k, c in enumerate(accident_data_list[0].split(',')) if k in nodes_acc_list])

dataset_list = [header]

#Processing of vehicle data using a list comprehension
veh_indices = {0}.union(nodes_veh_list)
dataset_list.extend(
    [
        [c for k, c in enumerate(line.split(',')) if k in veh_indices]
        for line in tqdm(vehicle_data_list[1:], desc="Processing vehicle data", unit="lines")
    ]
)

#Processing of accident data using a list comprehension and dictionary comprehension
acc_indices = {0}.union(nodes_acc_list)
dataset_dict = {
    acc_values[0]: acc_values[1:]
    for acc_values in (
        [c for k, c in enumerate(line.split(',')) if k in acc_indices]
        for line in tqdm(accident_data_list[1:], desc="Processing accident data", unit="lines")
    )
}

for j in tqdm(range(len(dataset_list)), desc="Combining datasets", unit="rows"):
    key = str(dataset_list[j][0])
    if key in dataset_dict:
        dataset_list[j].extend(dataset_dict[key])
        dataset_list[j].pop(0)

#Saving combined file
data = np.asarray(dataset_list)
file_name = input("Please enter the desired file name (e.g., 'combined_data'): ")
np.savetxt(file_name + ".csv", data, delimiter=',', fmt='%s')
print("File saved")