In [1]:
#loan env file
import dotenv
dotenv.load_dotenv()

True

In [2]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.pydantic_v1 import BaseModel
from langchain_experimental.tabular_synthetic_data.base import SyntheticDataGenerator
from langchain_experimental.tabular_synthetic_data.openai import (
    create_openai_data_generator,
    OPENAI_TEMPLATE,
)
from langchain_experimental.tabular_synthetic_data.prompts import (
    SYNTHETIC_FEW_SHOT_SUFFIX,
    SYNTHETIC_FEW_SHOT_PREFIX,
)

In [8]:
#data model
class NewInsurance(BaseModel):
    first_name:str
    last_name:str
    date_of_birth:str
    policy_value:int
    address: str
    country:str
    province_state:str

examples = [
    {
        "example": """First Name: John, Last Name: Doe, Date of Birth: 1980-01-05, 
        Policy Value: 70000, Address: Toronto M2J 0C3, Country:CA, Province/State:ON """
    },
    {
        "example": """First Name: San, Last Name: Rob, Date of Birth: 1965-11-05, 
        Policy Value: 80000, Address: 463 Pitt St K6J 3R2, Country:CA, Province/State:ON """
    },
    {
        "example": """First Name: Emily, Last Name: Stone, Date of Birth: 1980-01-05, 
        Policy Value: 90000, Address: 1326 Richford Road J0J 1C0, Country:CA, Province/State:QC """
    },
]



In [4]:
OPENAI_TEMPLATE = PromptTemplate(input_variables=["example"], template="{example}")

prompt_template = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

In [9]:
synthetic_data_generator = create_openai_data_generator(
    output_schema=NewInsurance,
    llm=ChatOpenAI(
        temperature=1
    ), 
    prompt=prompt_template,
)

In [10]:
synthetic_results = synthetic_data_generator.generate(
    subject="New Insurance Policies",
    extra="""the name must be chosen at random. Make it something you wouldn't normally choose. Policy value can be any value between 70000 to 120000 \
    Province/State can be ON,BC or QC
    """,
    runs=2,
)

In [11]:
synthetic_results

[NewInsurance(first_name='Liam', last_name='Johnson', date_of_birth='1990-10-07', policy_value=95000, address='789 Cedar St', country='CA', province_state='ON'),
 NewInsurance(first_name='Sarah', last_name='Smith', date_of_birth='1991-09-12', policy_value=88000, address='987 Maple St', country='CA', province_state='BC')]

In [12]:
from langchain.callbacks import get_openai_callback
costs=None
with get_openai_callback() as cb:
    #result = llm.invoke("Tell me a joke")
    synthetic_results = synthetic_data_generator.generate(
    subject="New Insurance Policies",
    extra="""the name must be chosen at random. Make it something you wouldn't normally choose. Policy value can be any value between 70000 to 120000 \
    Province/State can be ON,BC or QC
    """,
    runs=2,
    )
    print(cb)
    costs=cb

Tokens Used: 768
	Prompt Tokens: 624
	Completion Tokens: 144
Successful Requests: 2
Total Cost (USD): $0.001224


In [13]:
synthetic_results

[NewInsurance(first_name='Liam', last_name='Johnson', date_of_birth='1990-10-07', policy_value=95000, address='789 Cedar St', country='CA', province_state='ON'),
 NewInsurance(first_name='Sarah', last_name='Smith', date_of_birth='1991-09-12', policy_value=88000, address='987 Maple St', country='CA', province_state='BC'),
 NewInsurance(first_name='Samantha', last_name='Jones', date_of_birth='1985-12-05', policy_value=92000, address='123 Oak St', country='CA', province_state='ON'),
 NewInsurance(first_name='Emma', last_name='Wilson', date_of_birth='1988-06-23', policy_value=105000, address='456 Pine St', country='CA', province_state='BC')]