# Building Hyper-Personalized Branding Agents

## Objective

**We want to leverage historical datapoints on Linkedin posts to optimize and personalize the next post.**

## Assumption

1. Stori can track each user's Linkedin posts (views and comments)
2. There are already a way to do topic clustering, so we know each post's topic, and for each user, there are limited number of topics associated with posts
3. Stori can in near real time collect these data points



In [None]:
import pandas as pd
from datetime import datetime, timedelta
import random
from uuid import uuid4

topics1 = [
    "Real-time feature engineering",
    "Scalable data processing",
    "Automated model deployment",
    "Feature store integration",
    "Data versioning and lineage",
    "Low-latency predictions",
    "Seamless data ingestion",
    "Advanced analytics capabilities",
    "Custom feature transformations",
    "End-to-end ML pipeline",
]

topics2 = [
    "Beginner Fashion Design Workshop",
    "Advanced Sewing Techniques",
    "Sustainable Fashion Practices",
    "Digital Fashion Illustration",
    "Textile Design Basics",
    "Fashion Portfolio Development",
    "Creative Pattern Making",
    "Fashion Trend Forecasting",
    "Couture Embroidery Techniques",
    "Fashion Business Essentials",
]


random.seed(42)


def generate_views_events(user, topics, cap):
    for topic in topics:
        n = random.randint(1, cap)
        for _ in range(n):
            t = random.uniform(0, 5)
            yield dict(
                ts=datetime(2024, 10, 10) + timedelta(days=t),
                stori_user_id=user,
                post_id=str(uuid4()).split("-")[0],
                topic=topic,
                views=random.randint(10, 50),
            )


def generate_comments(user, topics, cap):
    comments = ["it is bad!", "it is good!", "i am not sure"]
    for topic in topics:
        n = random.randint(1, cap)
        for _ in range(n):
            t = random.uniform(0, 5)
            yield dict(
                ts=datetime(2024, 10, 10) + timedelta(days=t),
                stori_user_id=user,
                post_id=str(uuid4()).split("-")[0],
                topic=topic,
                comment=random.choice(comments),
            )

## Post Views Events

In [None]:
df1 = pd.DataFrame(generate_views_events("user_tecton", topics1, 10))
df2 = pd.DataFrame(generate_views_events("user_fashon", topics2, 10))
df = pd.concat([df1, df2])
df["ts"] = pd.to_datetime(df.ts)
views = df.sort_values(by=["ts"]).reset_index(drop=True)

views

Unnamed: 0,ts,stori_user_id,post_id,topic,views
0,2024-10-10 03:00:04.646256,user_tecton,d7a315ea,Real-time feature engineering,27
1,2024-10-10 03:34:32.398797,user_tecton,736d6905,Automated model deployment,23
2,2024-10-10 05:29:56.133739,user_tecton,60ccf091,Seamless data ingestion,24
3,2024-10-10 07:40:59.187213,user_fashon,59139683,Fashion Trend Forecasting,11
4,2024-10-10 07:56:33.477451,user_tecton,32beb38c,End-to-end ML pipeline,46
...,...,...,...,...,...
85,2024-10-14 15:28:10.628977,user_fashon,4023563f,Creative Pattern Making,22
86,2024-10-14 17:37:18.629633,user_fashon,04c60767,Creative Pattern Making,20
87,2024-10-14 18:51:56.047193,user_tecton,066a6e17,Data versioning and lineage,31
88,2024-10-14 22:44:34.087475,user_tecton,2a7b84c0,Custom feature transformations,50


## Post Comment Events

In [None]:
df1 = pd.DataFrame(generate_comments("user_tecton", topics1, 9))
df2 = pd.DataFrame(generate_comments("user_fashon", topics2, 9))
df = pd.concat([df1, df2])
df["ts"] = pd.to_datetime(df.ts)
comments = df.sort_values(by=["ts"]).reset_index(drop=True)

comments

Unnamed: 0,ts,stori_user_id,post_id,topic,comment
0,2024-10-10 01:07:04.277374,user_tecton,b0001dbb,Advanced analytics capabilities,i am not sure
1,2024-10-10 01:09:37.310231,user_fashon,4b5251a4,Digital Fashion Illustration,it is bad!
2,2024-10-10 04:19:24.155642,user_fashon,9e5b2e14,Digital Fashion Illustration,it is good!
3,2024-10-10 04:35:27.109257,user_fashon,785373b2,Fashion Business Essentials,i am not sure
4,2024-10-10 04:46:08.398109,user_tecton,cefd3dc5,Seamless data ingestion,it is bad!
...,...,...,...,...,...
95,2024-10-14 20:14:42.445667,user_tecton,f191feb9,Low-latency predictions,i am not sure
96,2024-10-14 20:16:46.383147,user_tecton,595f3483,Feature store integration,it is bad!
97,2024-10-14 22:03:44.630029,user_tecton,ecc27d1a,Custom feature transformations,i am not sure
98,2024-10-14 23:48:52.318449,user_tecton,3ba4dc83,Advanced analytics capabilities,it is bad!


In [None]:
from tecton import Aggregate
from tecton.types import Field as TectonField, Int64, Float64, String
from tecton_gen_ai.testing import (
    make_local_source,
    set_dev_mode,
    make_local_batch_feature_view,
)

set_dev_mode()

## User Info as Context

In [None]:
user_info_data = [
    {
        "stori_user_id": "user_tecton",
        "company_name": "Tecton",
        "description": "Tecton is a SaaS company that provides a feature platform designed to streamline the creation, management, and deployment of machine learning features. It enables data teams to build real-time, production-ready features efficiently, enhancing the performance and scalability of AI models.",
    },
    {
        "stori_user_id": "user_fashon",
        "company_name": "Fashon",
        "description": "Fashon is a cutting-edge fashion design studio that specializes in creating innovative and trendsetting apparel. With a focus on creativity and craftsmanship, Fashon delivers unique and stylish designs that cater to diverse tastes, setting new standards in the fashion industry.",
    },
]

user_info = make_local_batch_feature_view(
    "user_info",
    user_info_data,
    entity_keys=["stori_user_id"],
    description="User company profile",
)

## Topic Views as Context

In [None]:
topic_views = make_local_batch_feature_view(
    "topic_views",
    views,
    entity_keys=["stori_user_id"],
    aggregation_secondary_key="topic",
    timestamp_field="ts",
    features=[
        Aggregate(
            input_column=TectonField("views", Int64),
            function="sum",
            time_window=timedelta(days=365),
            name="total_views",
        ),
    ],
    description="Total views of posts of each topic",
    aggregation_interval=timedelta(hours=4),
)

In [None]:
request = pd.DataFrame([{"stori_user_id": "user_tecton", "ts": datetime.now()}])
topic_views.get_features_for_events(request).to_pandas()

## Using LLM to extract comment sentiment as Context

In [None]:
from pydantic import BaseModel, Field
from tecton import Entity
from tecton_gen_ai.extraction import llm_extraction


class Sentiment(BaseModel):
    sentiment_score: float = Field(
        description="sentiment score of the text, from 0 (most negative) to 1 (most positive)"
    )
    keywords: list[str] = Field(description="keywords extracted from the comment")


src = make_local_source(
    "src",
    comments,
    timestamp_field="ts",
)

user_id = Entity(
    name="stori_user_id",
    join_keys=[TectonField(name="stori_user_id", dtype=String)],
    description="Stori user id, starting with `user_`",
)

topic_sentiments = llm_extraction(
    src,
    name="topic_sentiments",
    aggregation_secondary_key="topic",
    extraction_config=[
        {
            "model": "openai/gpt-4o-mini",
            "column": "comment",
            "output_schema": Sentiment,
        },
    ],
    features=[
        Aggregate(
            input_column=TectonField("sentiment_score", Float64),
            function="mean",
            time_window=timedelta(days=365),
            name="average_sentiment",
        ),
    ],
    entities=[user_id],
    aggregation_interval=timedelta(hours=4),
    description="Comments average sentiment (0 - 1) on posts with certain topics on linkedin",
)

In [None]:
topic_sentiments.get_features_for_events(request).to_pandas()

Unnamed: 0,stori_user_id,ts,topic_sentiments_batch__sentiment_score,topic_sentiments_batch__keywords
0,user_tecton,2024-12-03 16:09:34.050764+00:00,0.0,[bad]


## Building A Naive Agent Using tecton_gen_ai

In [None]:
from tecton_gen_ai.api import prompt, Agent

agent1 = Agent(
    "app",
    prompt="You are an agent who helps people build brands. You response should be under 200 words",
    llm="openai/gpt-4o",
)

In [None]:
from tecton_gen_ai.testing.interactive import qna

qna(agent1, diagram=True)

VBox(children=(Text(value='', continuous_update=False, layout=Layout(width='90%'), placeholder='Type something…

## Building A Better Agent With Personal Info Enriched Prompts

In [None]:
@prompt(sources=[user_info])
def sys_prompt(user_info):
    prefix = "You are an agent who helps people build brands. You response should be under 200 words"
    return (
        prefix
        + f" You are serving a company {user_info['company_name']}: {user_info['description']}"
    )


agent2 = Agent("app", prompt=sys_prompt, llm="openai/gpt-4o")

qna(agent2, context={"stori_user_id": "user_tecton"}, diagram=True)

VBox(children=(Text(value='', continuous_update=False, layout=Layout(width='90%'), placeholder='Type something…

## Building An Even Better Agent With Views as Dynamic Context

In [None]:
agent3 = Agent("app", prompt=sys_prompt, tools=[topic_views], llm="openai/gpt-4o")

qna(agent3, context={"stori_user_id": "user_tecton"}, diagram=True)

VBox(children=(Text(value='', continuous_update=False, layout=Layout(width='90%'), placeholder='Type something…

## Build The Best Agent With All Structured Context Data You Have

In [None]:
agent4 = Agent(
    "app", prompt=sys_prompt, tools=[topic_views, topic_sentiments], llm="openai/gpt-4o"
)

qna(agent4, context={"stori_user_id": "user_tecton"}, diagram=True)

VBox(children=(Text(value='', continuous_update=False, layout=Layout(width='90%'), placeholder='Type something…