In [None]:
import os
import json
from collections import defaultdict

# Define the directory containing the folders
base_directory = "./job_listings"

# Initialize a dictionary to hold merged data for each file name
merged_data = defaultdict(list)

# Walk through each folder in the base directory
for folder_name in os.listdir(base_directory):
    folder_path = os.path.join(base_directory, folder_name)
    if os.path.isdir(folder_path):  # Check if it is a folder
        for file_name in os.listdir(folder_path):
            if file_name.startswith("job_listings_") and file_name.endswith(".json"):
                file_path = os.path.join(folder_path, file_name)
                with open(file_path, "r") as file:
                    try:
                        data = json.load(file)
                        merged_data[file_name].extend(data)
                    except json.JSONDecodeError as e:
                        print(f"Error reading {file_path}: {e}")

# Remove duplicate entries in the merged data (if needed)
for file_name, records in merged_data.items():
    unique_records = {json.dumps(record, sort_keys=True): record for record in records}
    merged_data[file_name] = list(unique_records.values())

# Save the merged data to a new directory
output_directory = "./Merged_Job_Listings"
os.makedirs(output_directory, exist_ok=True)

for file_name, data in merged_data.items():
    output_path = os.path.join(output_directory, file_name)
    with open(output_path, "w") as output_file:
        json.dump(data, output_file, indent=4)

print("Merging complete! Merged files are saved in:", output_directory)


Merging complete! Merged files are saved in: ./Merged_Job_Listings
