# Compare Case Groups

Demonstrates use of the Intelligence Toolkit library to compare groups in a dataset.

See [readme](https://github.com/microsoft/intelligence-toolkit/blob/main/app/workflows/compare_case_groups/README.md) for more details.

In [16]:
import sys

sys.path.append("..")
import polars as pl
from toolkit.compare_case_groups.api import CompareCaseGroups

In [None]:
# Create the workflow object
import os
from toolkit.AI.openai_configuration import OpenAIConfiguration


ccg = CompareCaseGroups()

ai_configuration = OpenAIConfiguration(
    {
        "api_type": "OpenAI",
        "api_key": os.environ["OPENAI_API_KEY"],
        "model": "gpt-4o",
    }
)
ccg.set_ai_configuration(ai_configuration)

data_path = "../example_outputs/compare_case_groups/customer_complaints/customer_complaints_prepared.csv"
customer_cases = pl.read_csv(data_path)
print("Loaded data")

In [18]:
filters = []
groups = ["city"]
aggregates = [
    "product_code",
    "delivery_issue",
    "description_issue",
    "price_issue",
    "quality_issue",
    "service_issue",
]
temporal = "period"

In [19]:
ccg.create_data_summary(
    customer_cases,
    filters,
    groups,
    aggregates,
    temporal,
)

In [None]:
ccg.model_df.head()

In [None]:
ccg.get_summary_description()

In [24]:
# Select groups to generate reports
# By group name
groups = ["Baytown", "Brookside"]
# OR
# By top n groups
top_groups = 4

report_data, filter_description = ccg.get_report_data(top_group_ranks=top_groups)

In [None]:
# # Explain the top-ranked pattern of longest length
explanation = ccg.generate_group_report(report_data, filter_description)
print(explanation)