In [3]:
import json
import random

def shorten_dataset(input_file, output_file, target_size, sampling_method="random"):
    """
    Shortens a JSON dataset file while preserving its structure.

    Args:
        input_file (str): Path to the input JSON file.
        output_file (str): Path to save the shortened JSON file.
        target_size (int): Desired number of examples in the shortened dataset.
        sampling_method (str, optional): "random" (default) for uniform random sampling,
                                          "first" to select the first N examples.
    """
    with open(input_file, "r") as f:
        data = json.load(f)

    if sampling_method == "random":
        shortened_data = random.sample(data, target_size)
    elif sampling_method == "first":
        shortened_data = data[:target_size]
    else:
        raise ValueError("Invalid sampling method. Choose 'random' or 'first'.")

    with open(output_file, "w") as f:
        json.dump(shortened_data, f, indent=4) 

# Example usage
input_file = "finance_alpaca.json"
output_file = "shortened_finance_alpaca.json"
target_size = 1200
sampling_method = "random"

shorten_dataset(input_file, output_file, target_size, sampling_method)
