# Attribute Patterns

Steps:
1. Prepare Data
2. Generate Graph Model with Dataset
3. Detect patterns
    1. Prepare Graph
    2. Generate Embedding
    3. Detect Patterns

## 1. Prepare Data

In [None]:
import sys
sys.path.append('..')

In [None]:
# Prepare data
import pandas as pd

from toolkit.attribute_patterns.model import prepare_data

df = None
df = pd.read_csv("./input/marketing_1.csv")

data_prepared = prepare_data(df)

## 1. Generate Graph Model with Dataset

In [None]:
from toolkit.attribute_patterns.model import generate_graph_model

period_col = "SignupYear"
model = generate_graph_model(data_prepared, period_col)
print(
    f'Graph model has **{len(model)}** links spanning **{len(model["Subject ID"].unique())}** cases, **{len(model["Full Attribute"].unique())}** attributes, and **{len(model["Period"].unique())}** periods.'
)

## 2. Detect Pattern Steps

### 1.Prepare Graph

In [None]:
from toolkit.attribute_patterns.model import prepare_graph

graph_df, time_to_graph = prepare_graph(model)
print(graph_df)
print(time_to_graph)

### 2. Generate Embedding

In [None]:
from toolkit.attribute_patterns.embedding import generate_embedding

embedding_df, node_to_centroid, period_embeddings = generate_embedding(
    graph_df, time_to_graph
)

### 3. Detect Patterns

In [None]:
from toolkit.attribute_patterns.model import detect_patterns

min_pattern_count = 15
max_pattern_length = 10

pattern_df, close_pairs, all_pairs = detect_patterns(
    node_to_centroid, period_embeddings, model, min_pattern_count, max_pattern_length
)
print(pattern_df.head(10))

In [None]:
period_count = len(pattern_df["period"].unique())
pattern_count = len(pattern_df)
unique_count = len(pattern_df["pattern"].unique())
print(
    f"Over **{period_count}** periods, detected **{pattern_count}** attribute patterns (**{unique_count}** unique) from **{close_pairs}**/**{all_pairs}** converging attribute pairs (**{round(close_pairs / all_pairs * 100, 2) if all_pairs > 0 else 0}%**). Patterns ranked by ```overall_score = normalize(length * ln(count) * z_score * detections)```."
)

## Generate AI Report

In [None]:
from toolkit.AI.classes import LLMCallback
from toolkit.AI.client import OpenAIClient
from toolkit.attribute_patterns.model import (
    compute_attribute_counts,
    create_time_series_df,
    prepare_for_ai_report,
)


def on_stream(text):
    print(text)


# chose a pattern
pattern_row = pattern_df.iloc[0]
pattern = pattern_row["pattern"]
period = pattern_row["period"]

time_series = create_time_series_df(model, pattern_df)


att_counts = compute_attribute_counts(df, pattern, period_col, period)

messages = prepare_for_ai_report(pattern, period, time_series, att_counts)
print(messages)

on_callback = LLMCallback()
on_callback.on_llm_new_token = on_stream

report = OpenAIClient().generate_chat(messages, callbacks=[on_callback])
print(report)