### Configure Input Data Source

In [None]:
from tecton_gen_ai.testing import set_dev_mode

set_dev_mode()

In [None]:
import pandas as pd
import pathlib

from tecton import Entity
from tecton.types import Field, String
from tecton_gen_ai.testing import make_local_source, set_dev_mode


df = pd.read_json(pathlib.Path().parent / "output.jsonl", lines=True)
src = make_local_source(
    "call_transcripts",
    df,
    description="Call transcripts",
)

transcript_id = Entity(
    name="transcript", join_keys=[Field(name="transcript_id", dtype=String)]
)

## Create LLM Extracted Feature Views

### Define Target Schema

In [None]:
import pydantic
from enum import Enum
from typing import Literal
from datetime import datetime


class TranscriptTopic(Enum):
    POLICY_COVERAGE = "policy-coverage"
    CLAIMS_PROCESS = "claims-process"
    DISCOUNTS = "discounts"
    RENEWAL = "renewal"
    CLAIMS_FILING = "claims-filing"
    PREMIUMS = "premiums"
    POLICY_CHANGES = "policy-changes"
    COVERAGE = "coverage"
    ACCIDENT = "accident"
    OTHER = "other"


class TranscriptFeatures(pydantic.BaseModel):
    summary: str = pydantic.Field(description="Summary of the conversation")
    topic: TranscriptTopic
    sentiment: Literal["positive", "neutral", "negative"]

In [None]:
from tecton_gen_ai.extraction import llm_extraction

extraction_config = [
    {"model": "openai/gpt-4o", "column": "conversation", "schema": TranscriptFeatures},
]
fv = llm_extraction(src, extraction_config, entities=[transcript_id])

In [None]:
df = fv.get_features_in_range(
    start_time=datetime(2024, 1, 1), end_time=datetime(2024, 1, 3)
).to_pandas()
df

Unnamed: 0,transcript_id,summary,topic,sentiment,_valid_from,_valid_to
0,pabc-d115d4e2,Alex Johnson inquires about customer service h...,other,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
1,pabc-33d5c4e5,John Doe called Insured to inquire about his p...,coverage,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
2,pabc-cfb69c46,"The customer, John Doe, is frustrated with the...",renewal,negative,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
3,pabc-07502e3c,John Doe called to inquire about the increase ...,premiums,negative,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
4,pabc-19fc9565,Emily Carter called to report a minor accident...,claims-filing,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
5,pabc-d5e9249d,A customer named John Doe is seeking assistanc...,claims-filing,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
6,pabc-3a2202de,A customer named Jane is seeking assistance wi...,claims-filing,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
7,pabc-e59c36cb,John Peterson is experiencing issues filing a ...,claims-filing,negative,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
8,pabc-223b170b,Alice Smith calls Insured to inquire about her...,premiums,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00
9,pabc-9d3e0732,"The customer, John, seeks assistance in filing...",claims-filing,positive,2024-01-02 00:00:00+00:00,2024-01-03 00:00:00+00:00


In [None]:
df.iloc[0]["summary"]

'Alex Johnson inquires about customer service hours and learns that they are available from 8 AM to 8 PM, Monday to Friday, and 9 AM to 5 PM on Saturdays. These hours apply to both phone and in-person visits. For support outside these hours, options include email or online chat.'

In [None]:
df.iloc[0]

transcript_id                                        pabc-d115d4e2
summary          Alex Johnson inquires about customer service h...
topic                                                        other
sentiment                                                 positive
_valid_from                              2024-01-02 00:00:00+00:00
_valid_to                                2024-01-03 00:00:00+00:00
Name: 0, dtype: object

In [None]:
df.dtypes

transcript_id                 object
summary                       object
topic                         object
sentiment                     object
_valid_from      datetime64[us, UTC]
_valid_to        datetime64[us, UTC]
dtype: object