In [1]:
import json
import os

# Function to transform each species entry to Solr-compatible format
def transform_data(input_data):
    transformed_data = []
    
    # Loop through each species in the input data
    for species_name, details in input_data.items():
        # Combine all nested texts in "sections" into a single field
        sections_text = ""
        for section_name, text in details.get("sections", {}).items():
            sections_text += f"{text}\n\n"  # Append each section text with a double newline as separator
        
        # Flatten each entry for Solr's document format
        document = {
            "id": species_name.replace(" ", "_"),  # Unique ID by replacing spaces with underscores
            "name": species_name,
            "introduction": details.get("introduction", ""),
            "sections": sections_text.strip(),  # Strip any trailing newlines
            "kingdom": details.get("scientific_classification", {}).get("Kingdom", ""),
            "clade": details.get("scientific_classification", {}).get("Clade", ""),
            "order": details.get("scientific_classification", {}).get("Order", ""),
            "family": details.get("scientific_classification", {}).get("Family", ""),
            "subfamily": details.get("scientific_classification", {}).get("Subfamily", ""),
            "tribe": details.get("scientific_classification", {}).get("Tribe", ""),
            "genus": details.get("scientific_classification", {}).get("Genus", ""),
            "species": details.get("scientific_classification", {}).get("Species", ""),
            "who_discovered": details.get("who_discovered", ""),
            "conservation_status": details.get("conservation_status", ""),
            "image_url": details.get("image_url", "")
        }
        
        # Append transformed document to the list
        transformed_data.append(document)
    
    return transformed_data

# Main function to process a single JSON file and output to Solr-compatible JSON
def process_single_json_file(input_file, output_file):
    # Load the JSON data from the single file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Transform the data
    transformed_data = transform_data(data)
    
    # Write the transformed data to the output file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(transformed_data, f, indent=2)

# Specify the input file and output file
input_file = "json_global.json"  # Update this with the actual file path
output_file = "transformed_species_data.json"  # Desired output file for Solr

# Run the processing function
process_single_json_file(input_file, output_file)

print(f"Transformation complete. Data saved to {output_file}")


Transformation complete. Data saved to transformed_species_data.json
