This script reads JSON objects from the 'original_traj_mapped.json' file, filters the objects to include only those with a 'time' attribute greater than or equal to 3600 seconds, sorts them by the 'time' attribute in ascending order, and writes the filtered and sorted objects to the 'output.json' file.

In [1]:
import json

# Read JSON objects from file
input_file = 'original_traj_mapped.json'
with open(input_file, 'r') as file:
    json_objects = [json.loads(line) for line in file]

# Filter and sort JSON objects
filtered_sorted_objects = sorted(
    (obj for obj in json_objects if obj['time'] >= 3600),
    key=lambda x: x['time']
)

# Write the filtered and sorted JSON objects to a new file
output_file = 'output_dummy.json'
with open(output_file, 'w') as file:
    for obj in filtered_sorted_objects:
        file.write(json.dumps(obj) + '\n')

print("Filtered and sorted JSON objects have been written to", output_file)


Filtered and sorted JSON objects have been written to output.json


Cell 2: Python script that reads the output.json file and displays the trip_id and its corresponding time attribute of all entries in the file:

In [None]:
import json

def display_trip_ids_and_times(file_path):
    with open(file_path, 'r') as file:
        json_objects = [json.loads(line) for line in file]
    
    for obj in json_objects:
        print(f"Trip ID: {obj['trip_id']}, Time: {obj['time']}")

# Example usage
file_path = 'output.json'  # Replace with the actual file path
display_trip_ids_and_times(file_path)


Trip ID: 17ec115a645c3165fce8a08dc02aa22bfc9ebd490be1996f110441f4fdfb6971, Time: 3603.497999
Trip ID: e71faf56f8dc7a5c59ba0d9e6a94e51312f433da82dc539570e69a95942ecefa, Time: 3606.006999
Trip ID: ee28fc89ced2dfce3a5f95cf168278f78752de76944103706a765702b0d9bc41, Time: 3610.046
Trip ID: 1aaadc77952fd5276ee2f0190c3b8a7632dacea6263336fae6545b35508cd582, Time: 3617.504999
Trip ID: 713bfea73ed723b5edb042d81061768f9eb85b1feeee964a728f2320de16b494, Time: 3618.895999
Trip ID: 731f8f0920ca599fa07130d90cfd5a5412a5284da589346337a2a0d5feb644fd, Time: 3627.681999
Trip ID: 1ae4509ab4d556fa25ce00d666fba85b9ec224a432da609522d2914df3cba17c, Time: 3629.048
Trip ID: 5b4a8eeb6c95bba2b3f6a599b11c651d19353077f7adfb2bc919d46905e5bfbc, Time: 3630.837999
Trip ID: 3692e90fe8410b5ffcf520a5ea0092850841e377712b74847ac5f7b5dd95f4b8, Time: 3634.021999
Trip ID: ae9d4125c28e1e19984aa087c3d9eab30bc33aefa547aff7ab8bd3ae34dca5b2, Time: 3640.542999
Trip ID: 9a3af894d34286f385d68b2ac3d58eddca85606679119f2fc35f0a5c4fce02b9, T

In [2]:
import json
import csv

def display_trip_ids_and_times(file_path, output_csv_path):
    with open(file_path, 'r') as file:
        json_objects = [json.loads(line) for line in file]
    
    with open(output_csv_path, 'w', newline='') as csvfile:
        fieldnames = ['trip_id', 'time']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for obj in json_objects:
            print(f"Trip ID: {obj['trip_id']}, Time: {obj['time']}")
            writer.writerow({'trip_id': obj['trip_id'], 'time': obj['time']})

# Example usage
file_path = 'output.json'  # Replace with the actual file path
output_csv_path = 'output.csv'  # Replace with the desired output CSV file path
display_trip_ids_and_times(file_path, output_csv_path)


Trip ID: 17ec115a645c3165fce8a08dc02aa22bfc9ebd490be1996f110441f4fdfb6971, Time: 3603.497999
Trip ID: e71faf56f8dc7a5c59ba0d9e6a94e51312f433da82dc539570e69a95942ecefa, Time: 3606.006999
Trip ID: ee28fc89ced2dfce3a5f95cf168278f78752de76944103706a765702b0d9bc41, Time: 3610.046
Trip ID: 1aaadc77952fd5276ee2f0190c3b8a7632dacea6263336fae6545b35508cd582, Time: 3617.504999
Trip ID: 713bfea73ed723b5edb042d81061768f9eb85b1feeee964a728f2320de16b494, Time: 3618.895999
Trip ID: 731f8f0920ca599fa07130d90cfd5a5412a5284da589346337a2a0d5feb644fd, Time: 3627.681999
Trip ID: 1ae4509ab4d556fa25ce00d666fba85b9ec224a432da609522d2914df3cba17c, Time: 3629.048
Trip ID: 5b4a8eeb6c95bba2b3f6a599b11c651d19353077f7adfb2bc919d46905e5bfbc, Time: 3630.837999
Trip ID: 3692e90fe8410b5ffcf520a5ea0092850841e377712b74847ac5f7b5dd95f4b8, Time: 3634.021999
Trip ID: ae9d4125c28e1e19984aa087c3d9eab30bc33aefa547aff7ab8bd3ae34dca5b2, Time: 3640.542999
Trip ID: 9a3af894d34286f385d68b2ac3d58eddca85606679119f2fc35f0a5c4fce02b9, T

In [3]:
import json
import csv

def process_json(file_path, output_csv_path):
    with open(file_path, 'r') as file:
        json_objects = [json.loads(line) for line in file]
    
    # Calculate lengths of arrays and store them in a list of tuples
    processed_data = []
    for obj in json_objects:
        time_gap_length = len(obj['time_gap'])
        lats_length = len(obj['lats'])
        lngs_length = len(obj['lngs'])
        dist_gap_length = len(obj['dist_gap'])
        
        processed_data.append({
            'trip_id': obj['trip_id'],
            'time_gap_length': time_gap_length,
            'lats_length': lats_length,
            'lngs_length': lngs_length,
            'dist_gap_length': dist_gap_length
        })
    
    # Sort the data by the length of the time_gap array
    processed_data.sort(key=lambda x: x['time_gap_length'])
    
    # Display the trip_id and corresponding array lengths
    for data in processed_data:
        print(f"Trip ID: {data['trip_id']}, Time Gap Length: {data['time_gap_length']}, Lats Length: {data['lats_length']}, Lngs Length: {data['lngs_length']}, Dist Gap Length: {data['dist_gap_length']}")
    
    # Write to CSV file only if the length is less than 30
    with open(output_csv_path, 'w', newline='') as csvfile:
        fieldnames = ['trip_id', 'time_gap_length', 'lats_length', 'lngs_length', 'dist_gap_length']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        for data in processed_data:
            if data['time_gap_length'] < 30:
                writer.writerow(data)

# Example usage
file_path = 'original_traj_mapped.json'  # Replace with the actual file path
output_csv_path = 'filtered_output.csv'  # Replace with the desired output CSV file path
process_json(file_path, output_csv_path)


Trip ID: a8178a79f9a2b059f4e951777992d4e6c36e3782a5767e4010f0831a46e36d6a, Time Gap Length: 12, Lats Length: 12, Lngs Length: 12, Dist Gap Length: 12
Trip ID: e72c13b0204f5985e80ec36e0dd4dc20fbf06d432af9ad0d3bfd637985c28423, Time Gap Length: 13, Lats Length: 13, Lngs Length: 13, Dist Gap Length: 13
Trip ID: 08d42691a20f566e666116e678f3c56651624a768b7a43092a5245acbbc71ace, Time Gap Length: 15, Lats Length: 15, Lngs Length: 15, Dist Gap Length: 15
Trip ID: f92066c34fdf0cf874e0487deb1cdbf76a2204c5629b66dfef220295d07fd942, Time Gap Length: 19, Lats Length: 19, Lngs Length: 19, Dist Gap Length: 19
Trip ID: d529f4e49de7a055fcc0a0ad4276cdcf58da8220335c8331d6c1ddb83b8a3e54, Time Gap Length: 21, Lats Length: 21, Lngs Length: 21, Dist Gap Length: 21
Trip ID: a31567d75e54b46bccbc3140f0d593a11c53d26897b7853d58c0f9b0696da0d6, Time Gap Length: 22, Lats Length: 22, Lngs Length: 22, Dist Gap Length: 22
Trip ID: bc2741603f81a883a9517929f08e72babd480ec49c7c278bc33ac77a654e586c, Time Gap Length: 27, Lats

In [9]:
import csv

def sort_and_filter_csv_by_column(input_file, output_file, column_name, max_time_minutes):
    with open(input_file, mode='r') as file:
        csv_reader = csv.DictReader(file)
        rows = list(csv_reader)
        
        # Filter rows by engagement time less than max_time_minutes and convert to float
        filtered_rows = [row for row in rows if float(row[column_name]) < max_time_minutes]
        
        # Sort the filtered rows by the specified column (captain_engagement_time)
        filtered_rows.sort(key=lambda x: float(x[column_name]))

    # Write the filtered and sorted rows to a new CSV file
    with open(output_file, mode='w', newline='') as file:
        csv_writer = csv.DictWriter(file, fieldnames=csv_reader.fieldnames)
        csv_writer.writeheader()
        csv_writer.writerows(filtered_rows)

# Example usage
input_file = 'Pooling/anon_pooling_jan_24_amman.csv'  # Replace with the actual file path
output_file = 'Pooling_Output/sorted_pooling_jan.csv'  # Replace with the desired output file path
column_name = 'captain_engagement_time'  # Column to sort and filter by
max_time_minutes = 15  # Maximum engagement time in minutes

sort_and_filter_csv_by_column(input_file, output_file, column_name, max_time_minutes)


In [2]:
import csv

def sort_and_filter_csv_by_column(input_file, output_file, column_name, max_time_minutes, filter_day):
    with open(input_file, mode='r') as file:
        csv_reader = csv.DictReader(file)
        rows = list(csv_reader)
        
        # Filter rows by engagement time less than max_time_minutes and by the specified day
        filtered_rows = [row for row in rows if  row['day'] == filter_day]
        
        # Sort the filtered rows by the specified column (captain_engagement_time)
        filtered_rows.sort(key=lambda x: float(x[column_name]))

    # Write the filtered and sorted rows to a new CSV file
    with open(output_file, mode='w', newline='') as file:
        csv_writer = csv.DictWriter(file, fieldnames=csv_reader.fieldnames)
        csv_writer.writeheader()
        csv_writer.writerows(filtered_rows)

# Example usage
input_file = 'Pooling/anon_pooling_feb_24_amman.csv'  # Replace with the actual file path
output_file = 'Pooling_Output/sorted_pooling_feb_02_all_trips.csv'  # Replace with the desired output file path
column_name = 'captain_engagement_time'  # Column to sort and filter by
max_time_minutes = 15  # Maximum engagement time in minutes
filter_day = '2024-02-02'  # The day to filter by

sort_and_filter_csv_by_column(input_file, output_file, column_name, max_time_minutes, filter_day)
