In [1]:
import json
import random
from pathlib import Path

In [4]:
def read_jsonl(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    data = [json.loads(line) for line in lines]
    return data

def write_jsonl(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        for item in data:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')

def split_data(data, n_parts):
    k, m = divmod(len(data), n_parts)
    return [data[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n_parts)]

def create_permutations(data_parts, n_permutations):
    permutations = []
    for _ in range(n_permutations):
        perm = data_parts[:]
        random.shuffle(perm)
        permutations.append([item for sublist in perm for item in sublist])
    return permutations

In [5]:
input_file = 'C:/Users/ADMIN/Desktop/DATN/Extract_Information/data/mave_filtered_test.jsonl'
output_dir = 'output'
n_parts = 10
n_permutations = 10

# Read the data
data = read_jsonl(input_file)

# Split the data into n_parts
data_parts = split_data(data, n_parts)

# Create n_permutations of the parts
permutations = create_permutations(data_parts, n_permutations)

# Create output directory if it doesn't exist
Path(output_dir).mkdir(parents=True, exist_ok=True)

# Write permutations to JSON files
for i, perm in enumerate(permutations):
    output_file = f'{output_dir}/permuted_{i+1}.jsonl'
    write_jsonl(perm, output_file)