# Attribute Patterns

Steps:
1. Prepare Data
2. Generate Graph Model with Dataset
3. Detect patterns
    1. Prepare Graph
    2. Generate Embedding
    3. Detect Patterns

## 1. Prepare Data

In [3]:
#%pip install ucimlrepo

import sys
from ucimlrepo import fetch_ucirepo

sys.path.append("..")
ozone_level_detection = fetch_ucirepo(id=172)

In [4]:
import pandas as pd


input_dataframe = pd.DataFrame(ozone_level_detection.data.original)
input_dataframe = input_dataframe.head(1000)

#transform date info yearly only
input_dataframe["Date"] = pd.to_datetime(input_dataframe["Date"])
input_dataframe["Date"] = input_dataframe["Date"].dt.year

#only columns Date, WSR0  WSR1  WSR2  WSR3  WSR4
input_dataframe = input_dataframe[["Dataset","Date", "WSR0", "WSR1", "WSR2", "WSR3", "WSR4"]]


In [None]:
# Prepare data

from toolkit.attribute_patterns.model import prepare_data

data_prepared = prepare_data(input_dataframe)

## 1. Generate Graph Model with Dataset

In [None]:
from toolkit.attribute_patterns.model import generate_graph_model

period_col = "Date"
model = generate_graph_model(data_prepared, period_col)
print(
    f'Graph model has **{len(model)}** links spanning **{len(model["Subject ID"].unique())}** cases, **{len(model["Full Attribute"].unique())}** attributes, and **{len(model["Period"].unique())}** periods.'
)

## 2. Detect Pattern Steps

### 1.Prepare Graph

In [None]:
from toolkit.attribute_patterns.model import prepare_graph

graph_df, time_to_graph = prepare_graph(model)
print(graph_df)
print(time_to_graph)

### 2. Generate Embedding

In [None]:
from toolkit.attribute_patterns.embedding import generate_embedding

embedding_df, node_to_centroid, period_embeddings = generate_embedding(
    graph_df, time_to_graph
)

### 3. Detect Patterns

In [None]:
from toolkit.attribute_patterns.model import detect_patterns

min_pattern_count = 15
max_pattern_length = 10

pattern_df, close_pairs, all_pairs = detect_patterns(
    node_to_centroid, period_embeddings, model, min_pattern_count, max_pattern_length
)
print(pattern_df.head(10))

In [None]:
period_count = len(pattern_df["period"].unique())
pattern_count = len(pattern_df)
unique_count = len(pattern_df["pattern"].unique())
print(
    f"Over **{period_count}** periods, detected **{pattern_count}** attribute patterns (**{unique_count}** unique) from **{close_pairs}**/**{all_pairs}** converging attribute pairs (**{round(close_pairs / all_pairs * 100, 2) if all_pairs > 0 else 0}%**). Patterns ranked by ```overall_score = normalize(length * ln(count) * z_score * detections)```."
)

## Generate AI Report

In [None]:
import os
from toolkit.AI.openai_configuration import OpenAIConfiguration
from toolkit.AI.client import OpenAIClient
from toolkit.attribute_patterns.model import (
    compute_attribute_counts,
    create_time_series_df,
    prepare_for_ai_report,
)

# chose a pattern
pattern_row = pattern_df.iloc[0]
pattern = pattern_row["pattern"]
period = pattern_row["period"]

time_series = create_time_series_df(model, pattern_df)

att_counts = compute_attribute_counts(input_dataframe, pattern, period_col, period)

messages = prepare_for_ai_report(pattern, period, time_series, att_counts)
print(messages)

ai_configuration = OpenAIConfiguration({
    "api_type": "OpenAI",
    "api_key": os.getenv("OPENAI_API_KEY"),
    "model": "gpt-4o-2024-08-06",
})

report = OpenAIClient(ai_configuration).generate_chat(messages, stream=False)
print(report)