### Post processing with LLM

Takes the raw LLM output and turns it into a json structure

In [None]:
import json
from typing import Dict, Any, Optional
from ollama import Client

class JsonStructurer:
    def __init__(self):
        self.client = Client()
        self.models = ['llama3.3:latest']

    def validate_json(self, text: str) -> Optional[Dict[Any, Any]]:
        try:
            return json.loads(text)
        except json.JSONDecodeError:
            return None

    def structure_text(self, text: str, schema: Dict) -> Dict:
        for model in self.models:
            try:
                prompt = f"""
                Convert this text to JSON matching exactly this schema: {json.dumps(schema, indent=2)}
                Text: {text}
                Requirements:
                1. Output must be valid JSON
                2. Must match schema exactly
                3. Return ONLY the JSON, no other text
                """

                response = self.client.chat(model=model, messages=[
                    {
                        'role': 'user',
                        'content': prompt
                    }
                ])

                result = response['message']['content']
                parsed = self.validate_json(result)
                
                if parsed:
                    return parsed
                    
            except Exception as e:
                print(f"Error with model {model}: {str(e)}")
                continue

        return self.create_empty_structure(schema)

    @staticmethod
    def create_empty_structure(schema: Dict) -> Dict:
        # Create empty structure matching schema
        return {k: None for k in schema.keys()}

In [None]:
structurer = JsonStructurer()

# Define your schema
schema = {
    "event": "string",
    "event_who": "string",
    "event_when": "string",
    "event_what": "string",
    "event_type": "string",
}

In [None]:
response_content = """**Event 1:**

*   **Event:** The case originated in an application (no. 11236/09) against the Republic of Turkey lodged with the Court under Article 34 of the Convention for the Protection of Human Rights and Fundamental Freedoms (“the Convention”) by a Turkish national, Mr Mehmet Aytunç Altay (“the applicant”), on 17 February 2006.
    *   **Event_who:** Mr Mehmet Aytunç Altay (“the applicant”)
    *   **Event_what:** lodged an application
    *   **Event_when:** 17 February 2006
    *   **Event_type:** event_procedure

**Event 2:**

*   **Event:** The applicant was represented by Ms G. Tuncer, a lawyer practising in Istanbul. The Turkish Government (“the Government”) were represented by their Agent.
    *   **Event_who:** The applicant, The Turkish Government (“the Government”)
    *   **Event_what:** was represented
    *   **Event_when:** *Not specified*
    *   **Event_type:** event_procedure

**Event 3:**

*   **Event:** The applicant alleged, in particular, that the restriction of the privacy of his consultations with his lawyer was incompatible with his rights under Article 8 of the Convention and that the domestic proceedings with respect to this measure had not complied with the requirements of Article 6 § 1 of the Convention.
    *   **Event_who:** The applicant
...
    *   **Event_who:** *Not specified*
    *   **Event_what:** was given, was declared
    *   **Event_when:** 17 October 2017
    *   **Event_type:** event_procedure
    
"""
structurer = JsonStructurer()
result = structurer.structure_text(response_content, schema)
print(json.dumps(result, indent=2))