# Customer Feedback analysis with Laurium
This notebook explores complex data extraction using laurium and introduces:
1) Extracting multiple fields from a piece of text.
2) Enforcing output categories using Literals.
3) Improving performance with few-shot prompting.

In [None]:
import pandas as pd
from langchain_core.output_parsers import PydanticOutputParser

from laurium.decoder_models import extract, llm, prompts, pydantic_models

### Multi-Field Extraction

#### Define Complex Output Schemas
Extract multiple pieces of structured data simultaneously

In [None]:
# Create LLM instance
feedback_llm = llm.create_llm(
    llm_platform="ollama", model_name="qwen2.5:7b", temperature=0.0
)

## Enforcing output categories using Literals
To restrict the output of the LLM to a specific set of categories, a literal 
(a list) of categories can be defined in the schema. If the LLM outputs a value
when is not present in the literal, the output parser will fail.

In [None]:
# Schema for analyzing customer feedback using Literal types for constraints
from typing import Literal

schema = {
    "sentiment": Literal["positive", "negative", "neutral"],
    "urgency": Literal[1, 2, 3, 4, 5],  # 1-5 scale
    "department": Literal["IT", "Support", "Product", "Sales", "Other"],
    "action_required": Literal["yes", "no"],
}

descriptions = {
    "sentiment": "Customer's emotional tone",
    "urgency": "How quickly this needs attention (1=low, 5=urgent)",
    "department": "Which department should handle this",
    "action_required": "Whether immediate action is needed",
}

# Create system message
system_message = prompts.create_system_message(
    base_message="Analyze customer feedback and "
    "extract structured information.",
    keywords=["urgent", "complaint", "praise", "bug", "feature"],
)

#### Improve Accuracy with Examples
Add few-shot examples to guide the model.

In [None]:
# Few-shot examples for better extraction - JSON format must match schema
few_shot_examples = [
    {
        "text": "System is down, can't access anything!",
        "sentiment": "negative",
        "urgency": 5,
        "department": "IT",
        "action_required": "yes",
    },
    {
        "text": "Love the new interface design",
        "sentiment": "positive",
        "urgency": 1,
        "department": "Product",
        "action_required": "yes",
    },
]

# Create extraction prompt with few-shot examples
extraction_prompt = prompts.create_prompt(
    system_message=system_message,
    examples=few_shot_examples,
    example_human_template="Feedback: {text}",
    example_assistant_template="""{{
        "sentiment": "{sentiment}",
        "urgency": {urgency},
        "department": "{department}",
        "action_required": "{action_required}"
    }}""",
    final_query="Feedback: {text}",
    schema=schema,  # Schema formatting still included with examples
    descriptions=descriptions,
)


# define output parser
FeedbackModel = pydantic_models.make_dynamic_example_model(
    schema=schema,
    descriptions=descriptions,
    model_name="CustomerFeedbackAnalysis",
)

# Create extractor and process sample data
parser = PydanticOutputParser(pydantic_object=FeedbackModel)
extractor = extract.BatchExtractor(
    llm=feedback_llm,  # your LLM instance
    prompt=extraction_prompt,
    parser=parser,
)

In [None]:
### inspect prompt
print("Generated system message:")
print(extraction_prompt.messages[0].prompt.template)

In [None]:
# Sample customer feedback data
feedback_data = pd.DataFrame(
    {
        "text": [
            "The login system crashed and I lost all my work!",
            "Really appreciate the new dark mode feature",
            "Can we get a mobile app version soon?",
            "Billing charged me twice this month, need help",
        ]
    }
)

results = extractor.process_chunk(feedback_data, text_column="text")
print(results.to_string(index=False))