In [8]:
import os

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [9]:
%load_ext autoreload
%autoreload 2

from extract_regulatory_notice import (
    NoticeEmailExtract
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# OpenAI

In [10]:
path_to_openai_key:str = os.path.expanduser('~/.openai/api_key')
with open(path_to_openai_key, 'r', encoding='utf-8') as file:
    os.environ["OPENAI_API_KEY"] = file.read().strip()

In [11]:
info_parse_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Parse the date of notice, sending entity name, sending entity
            phone, sending entity email, project id, site location,
            violation type, required changes, compliance deadline, and
            maximum potential fine from the message. If any of the fields
            aren't present, don't populate them. Try to cast dates into
            the YYYY-mm-dd format. Don't populate fields if they're not
            present in the message.

            Here's the notice message:

            {message}
            """,
        )
    ]
)

notice_parser_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

NOTICE_PARSER_CHAIN = (
        info_parse_prompt
        | notice_parser_model.with_structured_output(NoticeEmailExtract)
)