In [None]:
from google.colab import files
import json

# Function to read JSONL file into a list of objects
def read_jsonl(file_path):
    with open(file_path, 'r') as f:
        return [json.loads(line) for line in f]

# Function to write list of objects back to JSONL file
def write_jsonl(file_path, data):
    with open(file_path, 'w') as f:
        for item in data:
            f.write(json.dumps(item) + '\n')

# Function to filter out the specified range or specific indices of objects under the chosen classes
def remove_objects_by_class(data, class_ranges):
    filtered_data = []
    class_counters = {class_name: 0 for class_name in class_ranges}

    # Iterate over the data, filtering by class and counting occurrences
    for obj in data:
        obj_class = obj.get('class')
        if obj_class in class_ranges:
            class_counters[obj_class] += 1
            class_index = class_counters[obj_class]  # Get 1-based index

            # Get the indices and ranges for the current class
            removal_indices = class_ranges[obj_class]

            # Check if the current index is in the removal list or ranges
            to_remove = False
            for index_or_range in removal_indices:
                if isinstance(index_or_range, int):
                    if class_index == index_or_range:
                        to_remove = True
                        break
                elif isinstance(index_or_range, tuple):
                    if index_or_range[0] <= class_index <= index_or_range[1]:
                        to_remove = True
                        break

            if not to_remove:
                filtered_data.append(obj)
        else:
            # Keep objects that are not in the classes to remove
            filtered_data.append(obj)

    return filtered_data

# Main script
def process_jsonl():
    # Step 1: Upload the JSONL file
    uploaded = files.upload()  # Upload file using Colab's file upload method
    input_file = next(iter(uploaded))  # Get the name of the uploaded file

    # Step 2: Read the JSONL data
    data = read_jsonl(input_file)

    # Step 3: Initialize an empty dictionary to store class names and their corresponding indices or ranges
    class_ranges = {}

    # Step 4: Continuously ask the user for class names and removal indices/ranges until they decide to stop
    while True:
        class_name = input("Enter the class name (or 'done' to finish): ").strip()
        if class_name.lower() == 'done':
            break

        # Initialize an empty list to store removal indices/ranges for the current class
        removal_indices = []

        while True:
            index_or_range = input(f"Enter an index or a range (e.g., 5 or 10-13) for class '{class_name}' (or 'done' to move to the next class): ").strip()
            if index_or_range.lower() == 'done':
                break

            if '-' in index_or_range:
                try:
                    start_index, end_index = map(int, index_or_range.split('-'))
                    removal_indices.append((start_index, end_index))
                except ValueError:
                    print("Invalid range. Please enter a valid range like '10-13'.")
            else:
                try:
                    removal_indices.append(int(index_or_range))
                except ValueError:
                    print("Invalid input. Please enter a valid index or range.")

        class_ranges[class_name] = removal_indices

    # Step 5: Remove the specified objects from the classes
    modified_data = remove_objects_by_class(data, class_ranges)

    # Step 6: Save the modified data into a new file
    output_file = 'output.jsonl'
    write_jsonl(output_file, modified_data)

    # Step 7: Download the modified file
    files.download(output_file)
    print(f"Modified data has been written to {output_file}")

# Run the function
process_jsonl()


Saving Filtered_cleaned_ASCII_art_v3_intermediate_cleaning.jsonl to Filtered_cleaned_ASCII_art_v3_intermediate_cleaning (3).jsonl
Enter the class name (or 'done' to finish): zoo
Enter an index or a range (e.g., 5 or 10-13) for class 'zoo' (or 'done' to move to the next class): 1-3
Enter an index or a range (e.g., 5 or 10-13) for class 'zoo' (or 'done' to move to the next class): 5
Enter an index or a range (e.g., 5 or 10-13) for class 'zoo' (or 'done' to move to the next class): 10
Enter an index or a range (e.g., 5 or 10-13) for class 'zoo' (or 'done' to move to the next class): 12
Enter an index or a range (e.g., 5 or 10-13) for class 'zoo' (or 'done' to move to the next class): done
Enter the class name (or 'done' to finish): shoot
Enter an index or a range (e.g., 5 or 10-13) for class 'shoot' (or 'done' to move to the next class): 1-2
Enter an index or a range (e.g., 5 or 10-13) for class 'shoot' (or 'done' to move to the next class): 5
Enter an index or a range (e.g., 5 or 10-13) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Modified data has been written to output.jsonl
