In [3]:
import os
import pandas as pd

def csv_to_json_with_filename(csv_file_path, output_directory):
    # Read CSV and convert to JSON (as a list of dicts)
    df = pd.read_csv(csv_file_path)
    data_dicts = df.to_dict(orient='records')
    
    # Include the file name in each record
    file_name = os.path.basename(csv_file_path)
    for record in data_dicts:
        record['source_file'] = file_name[:-4]  # Remove '.csv' from the file name
    
    # Return the modified list of dictionaries
    return data_dicts

def combine_csv_to_json(input_directory, output_json_path):
    all_curriculum_data = []

    # Ensure the output directory exists
    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)

    # Iterate over CSV files in the input directory
    for csv_file in os.listdir(input_directory):
        if csv_file.endswith('.csv'):
            full_file_path = os.path.join(input_directory, csv_file)
            curriculum_data = csv_to_json_with_filename(full_file_path, input_directory)
            all_curriculum_data.extend(curriculum_data)
    
    # Save combined data to a JSON file
    pd.DataFrame(all_curriculum_data).to_json(output_json_path, orient='records', indent=4)

    print(f"Combined JSON saved to: {output_json_path}")

# Example usage
input_directory = "/home/njui/kn_workspace/curriculum_taxonomy_extractor/data/interim/math/math/grade_1"
output_json_path = "/home/njui/kn_workspace/curriculum_taxonomy_extractor/data/interim/math/math/grade_1/combined_curriculum.json"
combine_csv_to_json(input_directory, output_json_path)

# # Specify the directory containing your CSV files
# directory_path = "/home/njui/kn_workspace/curriculum_taxonomy_extractor/data/interim/math/math/grade_1"
# combine_csv_to_json(directory_path)


Combined JSON saved to: /home/njui/kn_workspace/curriculum_taxonomy_extractor/data/interim/math/math/grade_1/combined_curriculum.json
