References:
- https://medium.com/snowflake/just-the-gist-snowflake-cortex-llm-with-langchain-llm-5a91647f18c8
- https://medium.com/@mattchinnock/controlling-large-language-model-output-with-pydantic-74b2af5e79d1

In [1]:
import snowflake.connector
from snowflake.snowpark import Session
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from typing import Any, List, Mapping, Optional
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser 
from langchain_community.llms.vllm import VLLMOpenAI
from dotenv import load_dotenv
import pandas as pd
import os
import time

load_dotenv()
CSDW_USEC_PASSWORD = os.getenv("CSDW_USEC_PASSWORD")

## 0. Brief demo of Pydantic model parsing

In [74]:
from pydantic import BaseModel
from langchain_core.output_parsers.pydantic import PydanticOutputParser

# by default, the `extra` argument is set to 'ignore', extra fields will not be stored.
class User(BaseModel, extra="allow"):
    name: str
    age: int

class UserStrict(BaseModel, extra="forbid"):
    name: str
    age: int

# JSON input with extra keys
user_json = """
{
    "name": "Alice",
    "age": 30,
    "email": "alice@example.com",
    "department": "Commercial"
}
"""

# Parse the JSON input
user = User.model_validate_json(user_json)
print(user)
print(user.model_dump())
print(user.__pydantic_extra__)

# Repeat the same with the strict model
try:
    user_strict = UserStrict.model_validate_json(user_json) # Extra inputs are not permitted
except ValueError as e:
    print(e)

name='Alice' age=30 email='alice@example.com' department='Commercial'
{'name': 'Alice', 'age': 30, 'email': 'alice@example.com', 'department': 'Commercial'}
{'email': 'alice@example.com', 'department': 'Commercial'}
2 validation errors for UserStrict
email
  Extra inputs are not permitted [type=extra_forbidden, input_value='alice@example.com', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/extra_forbidden
department
  Extra inputs are not permitted [type=extra_forbidden, input_value='Commercial', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/extra_forbidden


In [79]:
user_json = """
{
    "name": "Alice",
    "age": 30,
    "email": "alice@example.com",
    "department": "Commercial"
}
other information
"""

user_parser = PydanticOutputParser(pydantic_object=User)
try:
    print(user_parser.parse(user_json))
    print("Successful")
except ValueError as e:
    print(e)

user_parser_strict = PydanticOutputParser(pydantic_object=UserStrict)
try:
    print(user_parser_strict.parse(user_json))
    print("Successful")
except ValueError as e:
    print(e)

name='Alice' age=30 email='alice@example.com' department='Commercial'
Successful
Failed to parse UserStrict from completion {"name": "Alice", "age": 30, "email": "alice@example.com", "department": "Commercial"}. Got: 2 validation errors for UserStrict
email
  Extra inputs are not permitted [type=extra_forbidden, input_value='alice@example.com', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/extra_forbidden
department
  Extra inputs are not permitted [type=extra_forbidden, input_value='Commercial', input_type=str]
    For further information visit https://errors.pydantic.dev/2.7/v/extra_forbidden


## 0. Setting Up Snowflake Connection and Custom LLM

In [2]:
connection_parameters = {
    "account": "psai-csdw_usec",
    "user": "JOSHUA_USEC",
    "password": CSDW_USEC_PASSWORD,
    "role": "DATA_SCIENTIST", 
    "warehouse": "COMPUTE_VWH",  # optional
    } 
sp_session = Session.builder.configs(connection_parameters).create()  

In [3]:
allowed_models = ['mistral-7b', 'mixtral-8x7b', 'llama2-70b-chat', 'gemma-7b']

In [4]:
# cortex_function = 'complete'
# model = 'mistral-7b'
# prompt_text = 'What is the meaning of life?'

# # sql_statement = f'''select snowflake.cortex.{cortex_function}('{model}','{prompt_text}') as llm_reponse;'''

# sql_statement = f""" 
# SELECT SNOWFLAKE.CORTEX.{cortex_function}
# (
#     {model},
#     [
#         {{
#             'role': 'user',
#             'content': '{prompt_text}'
#         }}
#     ],
#     {{
#         'temperature': 0,
#         'max_tokens': 10
#     }}
# );
# """

# sql_statement

In [5]:
class SnowflakeCortexLLM(LLM):

    sp_session: Session = None
    """Snowpark Session class instance, set before invoking the LLM to authenticate to an appropriate Snowflake account with Cortex LLMs provisioned."""

    model: str = 'mistral-7b'
    """The Snowflake cortex hosted LLM model name, default to `mistral-7b`. Refer to doc for other options."""

    cortex_function: str = 'complete'
    """The cortex function to use, defaulted to complete. for other types refer to doc"""

    llm_type: str = 'snowflake-cortex'
    """The type of LLM, defaulted to snowflake-cortex, for logging purposes only."""

    @property
    def _llm_type(self) -> str:
        return "snowflake_cortex"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Adapt the Snowflake Cortex LLM SQL-based API to this Python interface.
        Modify this accordingly to the available Snowflake Cortex LLM API.
        For example, this implementation is based on the following snowflake SQL command: 
        `SELECT SNOWFLAKE.CORTEX.COMPLETE('<model_name>', '<prompt_text>');`
        """    
        prompt_text = prompt
        # simple version
        # sql_statement = f'''select snowflake.cortex.{self.cortex_function}('{self.model}','{prompt_text}') as llm_reponse;'''
        # version with parameters and returns the token counts
        # use double {{}} to escape the curly braces in the f-string
        sql_statement = f""" 
            SELECT SNOWFLAKE.CORTEX.{self.cortex_function}
            (
                '{self.model}',
                [
                    {{
                        'role': 'user',
                        'content': '{prompt_text}'
                    }}
                ],
                {{
                    'temperature': 0
                }}
            )
            AS LLM_RESPONSE;            
            """
        l_rows = self.sp_session.sql(sql_statement).collect()
        llm_response = l_rows[0]['LLM_RESPONSE'] # only 1 row is expected from the SQL statement as it is applied to 1 prompt
        return llm_response

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "model": self.model
            ,"cortex_function" : self.cortex_function
            ,"snowpark_session": self.sp_session.session_id
        }
    @property
    def _llm_type(cls) -> str:
        """Get the type of language model used by this chat model. Used for logging purposes only."""
        return cls.llm_type

llm = SnowflakeCortexLLM(sp_session=sp_session)
print(llm.model)

# set a different model from the default
llm = SnowflakeCortexLLM(sp_session=sp_session, model='mixtral-8x7b')
print(llm.model)

mistral-7b
mixtral-8x7b


In [6]:
sample_text = """
Officials involved in the clear up of the bridge collapse at Baltimore Port in the US have said the vessel that remains lodged among debris will be removed by 10 May.
While some ships have been able to navigate in and out of the port through a limited access channel opened up by the coastguard in the middle of the collapsed bridge, the Dali containership has remained in the place where it collided with the Francis Scott Key Bridge since the incident took place at the major port in Maryland on 26 March.
Ahead of the planned removal of the ship, a 35ft deep Fort McHenry Limited Access Channel that had been open for four days and allowed the first container ship to return to the port closed on 29 April, though the three other temporary channels, which are 20, 14 and 11ft deep, will remain open.
Maryland Governor Wes Moore highlighted some of the difficulties that have been faced by the team attempting to clear the bridge debris and Dali vessel.
"That work is remarkably complicated, we're talking about a massive piece of steel," he said.
"On one end the steel is leaning against a vessel that is the size of the Eiffel Tower and, on the other end, it is leaning against the bottom of the riverbed, so this work is dangerous."
Additionally, the clearance operation is also being run hand in hand with the continuing recovery operations for the two roadworkers still missing after falling with the bridge, with only four bodies recovered from the wreckage so far.
While the Maryland Government and Port of Baltimore provided further details about the removal of the vessel, the authorities would not be drawn on how much longer the cleanup and recovery effort could take.
However, the authorities have set the end of May as a target date for the reopening of the Port of Baltimore's permanent 50ft deep and 700ft wide channel, with an initial 45ft channel expected to open when the ship is removed around 10 May.
"""

## 1. Testing with Basic prompt

In [7]:
instruction_prompt = '''
You are a helpful AI assistant for data science domain. Help the user with the following query: {user_query}.
'''
user_query = '''Can you explain the concept of precision and recall in classification tasks, in about 100 words'''
l_prompt = PromptTemplate.from_template(instruction_prompt)


In [8]:
llm = SnowflakeCortexLLM(sp_session = sp_session, model = 'mistral-7b')
chain = l_prompt |  llm | StrOutputParser() 
tic = time.time()
llm_response  = chain.invoke({'user_query' : user_query})
toc = time.time()
print(f"Time taken for the LLM to respond: {toc-tic} seconds")
print(llm_response, sep='\n')

Time taken for the LLM to respond: 1.915893793106079 seconds
{
  "choices": [
    {
      "messages": " Absolutely! In classification tasks, Precision is the proportion of true positive predictions among all positive predictions made by a model. It measures the model's ability to correctly identify positive instances. Recall, on the other hand, is the proportion of true positive predictions among all actual positive instances in the data. It measures the model's ability to find all positive instances. A high precision model may miss some positive instances (low recall), while a high recall model may incorrectly label some negative instances as positive (low precision). Balancing these two metrics is crucial for effective classification."
    }
  ],
  "created": 1717983736,
  "model": "mistral-7b",
  "usage": {
    "completion_tokens": 118,
    "prompt_tokens": 52,
    "total_tokens": 170
  }
}


In [9]:
eval(llm_response)['usage']['total_tokens']

170

In [10]:
# results = []
# for model_name in allowed_models:
#     print(f"Running LLM {model_name}")
#     llm = SnowflakeCortexLLM(sp_session = sp_session, model = model_name)
#     chain = l_prompt |  llm | StrOutputParser() 
#     response_durations = []
#     response_contents = []
#     tokens = []
#     for _ in range(10):
#         tic = time.time()
#         llm_response  = chain.invoke({'user_query' : user_query})
#         toc = time.time()
#         response_time = toc - tic
#         total_tokens = eval(llm_response)['usage']['total_tokens']
#         response_durations.append(response_time)
#         response_contents.append(llm_response)
#         tokens.append(total_tokens)
#     average_response_time = sum(response_durations)/len(response_durations)
#     tokens_per_second = sum(tokens)/sum(response_durations)
#     result = (model_name, average_response_time, tokens_per_second, response_durations, response_contents)
#     results.append(result)

In [11]:
# df = pd.DataFrame(results, columns=['model_name', 'average_response_time', 'tokens_per_second', 'response_durations', 'response_contents'])
# df.sort_values(by='average_response_time', ascending=True, inplace=True)
# df

## 2. Extract structured output

Models in Snowpark do not have with_structured_output() implemented by LangChain yet, so we need to work around still by using Pydantic

In [12]:
class NewsInfo(BaseModel):
    """Information extracted from the text."""
    title: str = Field(
        description="One sentence summary of the article of maximum 200 characters, prefereably with the event, location and time information."
    )
    summary: str = Field(
        description="A short summary of the text, maximum 200 words"
    )
    impact: str = Field( 
        description="Answer only Yes or No to this question: does this event negatively impact a supply chain network (the movement of people and goods)? Answer this by following the following reasoning steps: \
            If the event can directly impact a supply chain network in a negative way, such as causing facility damage or traffic stopage, etc., then Yes. \
            Else if it can potentially disrupt the normal operations a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions, etc., then asnwer Yes. \
            If not or uncertain, such as general knowledge, good news, individual personnel events, project annoucement etc., answer No",
        enum=["Yes", "No"]
    )
    reasoning: str = Field( 
        description="The reasoning behind your impact assessment based on the impact reasoning step above. Explain why you think the event will (Yes) or will not (No) impact the supply chain network."
    )
    vessel_name: Optional[list[str]] = Field(
        default=[""], 
        description="The names of the marine vessels or container ships mentioned in the text, if any."
    )

news_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm, specialized in news analysis."
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return 'Uncertain' for the attribute's value.",
        ),
        ("human", "{user_input}"),
    ]
)


### Sample input text

In [13]:
sample_text = """
Officials involved in the clear up of the bridge collapse at Baltimore Port in the US have said the vessel that remains lodged among debris will be removed by 10 May.
While some ships have been able to navigate in and out of the port through a limited access channel opened up by the coastguard in the middle of the collapsed bridge, the Dali containership has remained in the place where it collided with the Francis Scott Key Bridge since the incident took place at the major port in Maryland on 26 March.
Ahead of the planned removal of the ship, a 35ft deep Fort McHenry Limited Access Channel that had been open for four days and allowed the first container ship to return to the port closed on 29 April, though the three other temporary channels, which are 20, 14 and 11ft deep, will remain open.
Maryland Governor Wes Moore highlighted some of the difficulties that have been faced by the team attempting to clear the bridge debris and Dali vessel.
"""

In [14]:
# Note the mixtral-8x7b model, is not available for the structured output.
# however, the mistral-small endpoint from MistralAI and AzureML/AI studio is available for structured output
llm = SnowflakeCortexLLM(sp_session = sp_session, model='mixtral-8x7b')
try:
    news_runnable = news_prompt | llm.with_structured_output(schema=NewsInfo)
    body_structured = news_runnable.invoke({"user_input": sample_text})
    print(body_structured.dict())
except Exception as e:
    print(e)





In [15]:
# create a PydanticOutputParser object with the schema, this will be used to generate the format instructions, passed to the LLM prompt
from langchain_core.output_parsers.pydantic import PydanticOutputParser


pydantic_parser = PydanticOutputParser(pydantic_object=NewsInfo)
format_instructions = pydantic_parser.get_format_instructions()

NEWS_PARSING_PROMPT = """
You are an expert extraction algorithm, specialized in news analysis. Your goal is to understand and parse out the news article content based on the user instructions of the output schema. 
Only ouput the result into the schema without generating any other information outside the schema.
The scheme instructions are as followed:
{format_instructions}

news article content:
{news_content}
"""
prompt = PromptTemplate.from_template(NEWS_PARSING_PROMPT)
llm = SnowflakeCortexLLM(sp_session = sp_session, model='mistral-7b')
chain = prompt | llm | StrOutputParser() 
print(llm)

[1mSnowflakeCortexLLM[0m
Params: {'model': 'mistral-7b', 'cortex_function': 'complete', 'snowpark_session': 5516262723694610}


In [16]:
llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': sample_text})
result = eval(llm_response)['choices'][0]['messages']
print(result) # raw string, to validate against the pydantic model

eval(llm_response) # the full response with token counts

 {
"title": "Baltimore Port Bridge Collapse: Dali Containship Removal Delayed, Channel Closed",
"summary": "Officials have announced that the Dali containership, which collided with the Francis Scott Key Bridge at Baltimore Port in March, will be removed by 10 May. However, a deep access channel that had been open for four days, allowing the first container ship to return to the port, was closed on 29 April. Maryland Governor Wes Moore discussed the challenges faced by the team clearing the bridge debris and the vessel.",
"impact": "Yes",
"reasoning": "The bridge collapse at Baltimore Port has directly impacted the supply chain network by preventing the Dali containership from being removed, causing a closure of a deep access channel that had allowed the first container ship to return to the port. The closure of the channel will disrupt the normal operations of the supply chain network by limiting the number of vessels that can access the port.",
"vessel_name": ["Dali"]
}


{'choices': [{'messages': ' {\n"title": "Baltimore Port Bridge Collapse: Dali Containship Removal Delayed, Channel Closed",\n"summary": "Officials have announced that the Dali containership, which collided with the Francis Scott Key Bridge at Baltimore Port in March, will be removed by 10 May. However, a deep access channel that had been open for four days, allowing the first container ship to return to the port, was closed on 29 April. Maryland Governor Wes Moore discussed the challenges faced by the team clearing the bridge debris and the vessel.",\n"impact": "Yes",\n"reasoning": "The bridge collapse at Baltimore Port has directly impacted the supply chain network by preventing the Dali containership from being removed, causing a closure of a deep access channel that had allowed the first container ship to return to the port. The closure of the channel will disrupt the normal operations of the supply chain network by limiting the number of vessels that can access the port.",\n"vessel

In [17]:
# alternatively:
l_prompt = PromptTemplate.from_template(NEWS_PARSING_PROMPT)
prompt = ChatPromptTemplate.from_template(
    template=NEWS_PARSING_PROMPT,
    partial_variables = {
        "format_instructions": format_instructions # passing in the formatting instructions created earlier in place of "format_instructions" placeholder
    }
)

chain = {"news_content": lambda x: x["news_content"]} | prompt | llm
llm_response = chain.invoke({"news_content": sample_text})
result =eval(llm_response)['choices'][0]['messages']
eval(result)

{'title': 'Baltimore Port Bridge Collapse: Dali Containship Removal Delayed, Impacting Supply Chain',
 'summary': 'The Dali containership remains lodged among the debris of the collapsed bridge at Baltimore Port in the US, causing a delay in its removal and impacting the supply chain. The Fort McHenry Limited Access Channel, which allowed the first container ship to return to the port, has been closed, while three other temporary channels remain open.',
 'impact': 'Yes',
 'reasoning': 'The bridge collapse at Baltimore Port has caused the Dali containership to remain lodged among the debris, preventing it from being removed and disrupting the normal operations of the supply chain network. The closure of the Fort McHenry Limited Access Channel, which was the deepest channel allowing larger vessels to navigate in and out of the port, further exacerbates the disruption.',
 'vessel_name': ['Dali']}

In [18]:
# validate the llm response against the schema:
parsed_response = pydantic_parser.parse(eval(llm_response)['choices'][0]['messages']) # the validation works on a string, not a dictionary result from eval()
parsed_response

NewsInfo(title='Baltimore Port Bridge Collapse: Dali Containship Removal Delayed, Impacting Supply Chain', summary='The Dali containership remains lodged among the debris of the collapsed bridge at Baltimore Port in the US, causing a delay in its removal and impacting the supply chain. The Fort McHenry Limited Access Channel, which allowed the first container ship to return to the port, has been closed, while three other temporary channels remain open.', impact='Yes', reasoning='The bridge collapse at Baltimore Port has caused the Dali containership to remain lodged among the debris, preventing it from being removed and disrupting the normal operations of the supply chain network. The closure of the Fort McHenry Limited Access Channel, which was the deepest channel allowing larger vessels to navigate in and out of the port, further exacerbates the disruption.', vessel_name=['Dali'])

In [19]:
# if the response is not valid, the parser will raise an error
try:
    pydantic_parser.parse('{"foo":"bar"}')
except Exception as e:
    print(e)

Failed to parse NewsInfo from completion {"foo": "bar"}. Got: 4 validation errors for NewsInfo
title
  field required (type=value_error.missing)
summary
  field required (type=value_error.missing)
impact
  field required (type=value_error.missing)
reasoning
  field required (type=value_error.missing)


## 3. Experiment to test the function calling ability of all available models

Preparation steps
- Prepare the data, escape the single-quote character
- Define the Pydantic schema
- define the prompts: NEWS_PARSING_PROMPT with placeholders for the instructions and the actual text 

For each model in allowed model list:
- Initiate the custom LLM and build a chain
- invoke the chain
- eval the response
- valdiate with the Pydantic schema


In [21]:
from langchain_core.output_parsers.pydantic import PydanticOutputParser

pydantic_parser = PydanticOutputParser(pydantic_object=NewsInfo)
format_instructions = pydantic_parser.get_format_instructions()

NEWS_PARSING_PROMPT = """
You are an expert extraction algorithm, specialized in news analysis. Your goal is to understand and parse out the news article content based on the user instructions of the output schema. 
Only ouput the result into the schema without generating any other information outside the schema.
The scheme instructions are as followed:
{format_instructions}

news article content:
{news_content}
"""

prompt = PromptTemplate.from_template(NEWS_PARSING_PROMPT)

### a. Getting 100 articles, 10 are from WK Webster

In [20]:
df = pd.read_csv('./inputs/sample_news_api_ml_100rows.csv')
df_wkwebster = pd.read_csv('./inputs/sample_news_api_ml_wkwebster_2.csv')
body_cleaned = df['BODY_CLEANED'].to_list()[:90] + df_wkwebster['BODY_CLEANED'].to_list()[:10]
body_cleaned = [body.replace("'", "\\'") for body in body_cleaned]

### b. Helper evaluation function

In [22]:
def evaluate_llm(model:str, data:list, pydantic_parser):
    evaluation = {"model":[], "raw_response":[], "success":[], "response_time":[], "prompt_tokens":[], "completion_tokens":[], "total_tokens":[]}
    llm = SnowflakeCortexLLM(sp_session = sp_session, model=model)
    chain = prompt | llm | StrOutputParser() 
    for i in range(len(data)):
        if i+1 == 1 or (i+1)%5==0:
            print("Processing text: ", i+1, "/", len(data))
        text = data[i]
        evaluation["model"].append(model)
        try:
            tic = time.time()
            llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': text})
            toc = time.time()
            response_time = round(toc - tic, 2)
            result = eval(llm_response)['choices'][0]['messages']
            prompt_tokens = eval(llm_response)['usage']['prompt_tokens']
            completion_tokens = eval(llm_response)['usage']['completion_tokens']
            total_tokens = eval(llm_response)['usage']['total_tokens']
            evaluation["raw_response"].append(result)
            evaluation["response_time"].append(response_time)
            evaluation["prompt_tokens"].append(prompt_tokens)
            evaluation["completion_tokens"].append(completion_tokens)
            evaluation["total_tokens"].append(total_tokens)
            try:
                pydantic_parser.parse(result)
                evaluation["success"].append(1)
            except Exception as e:
                print(f"Error: {e}")
                evaluation["success"].append(0)
        except Exception as e:
            print(f"Error: {e}")
            evaluation["raw_response"].append(e)
            evaluation["response_time"].append("Error")
            evaluation["prompt_tokens"].append("Error")
            evaluation["completion_tokens"].append("Error")
            evaluation["total_tokens"].append("Error")
            evaluation["success"].append(0)

    return evaluation

### c. Testing with a model on a list of texts

In [None]:
sample_data = body_cleaned[91:93]
evaluation = evaluate_llm('gemma-7b', sample_data, pydantic_parser)
# for gemma-7b, the response starts with ```json and ends with ```, they are ok to be parsed
pydantic_parser.parse(evaluation['raw_response'][0])

In [None]:
parsed_result = pydantic_parser.parse(evaluation['raw_response'][0])
parsed_result.dict()

### d. Runnning over selected models on the full list of texts

In [None]:
models = ['llama2-70b-chat', 'gemma-7b']
evaluations = []
for model_name in models:
    print(f"Running LLM {model_name}")
    evaluations.append(evaluate_llm(model_name, body_cleaned, pydantic_parser))
    print("="*50)

In [None]:
df_eval = pd.DataFrame(evaluations[1])
df_eval[df_eval['total_tokens'] == max(df_eval['total_tokens'])]

In [None]:
# df_eval = pd.DataFrame(evaluations[0])
# df_eval.to_csv('./outputs/llm_evaluation_llama2-70b-chat.csv')

## 4. Testing with GPT3.5 without the built-in `with_structured_output`

### a. Setting up

In [23]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
prompt = PromptTemplate.from_template(NEWS_PARSING_PROMPT)
chain = prompt | llm # removed StrOutputParser() to see other information such as tokens used

### b. Testing with sample text

In [24]:
llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': sample_text})

# to see the content:
print(llm_response.content)

# to see the tokens used:
print(llm_response.response_metadata['token_usage'])

{
  "title": "Bridge collapse at Baltimore Port",
  "summary": "Officials are planning to remove the vessel lodged among debris by 10 May following the bridge collapse at Baltimore Port in the US. Some ships have been able to navigate through a limited access channel opened by the coastguard, but the Dali containership remains in place since the incident on 26 March.",
  "impact": "Yes",
  "reasoning": "The bridge collapse and the vessel remaining lodged among debris can directly impact the supply chain network by obstructing the movement of goods and vessels in and out of the port.",
  "vessel_name": ["Dali"]
}
{'completion_tokens': 134, 'prompt_tokens': 762, 'total_tokens': 896}


In [25]:
# validate the content
pydantic_parser.parse(llm_response.content)

NewsInfo(title='Bridge collapse at Baltimore Port', summary='Officials are planning to remove the vessel lodged among debris by 10 May following the bridge collapse at Baltimore Port in the US. Some ships have been able to navigate through a limited access channel opened by the coastguard, but the Dali containership remains in place since the incident on 26 March.', impact='Yes', reasoning='The bridge collapse and the vessel remaining lodged among debris can directly impact the supply chain network by obstructing the movement of goods and vessels in and out of the port.', vessel_name=['Dali'])

### c. Run on whole dataset

In [None]:
model="gpt-3.5-turbo-0125"
evaluation = {"model":[], "raw_response":[], "success":[], "response_time":[], "prompt_tokens":[], "completion_tokens":[], "total_tokens":[]}
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
chain = prompt | llm

for i in range(len(body_cleaned)):
    if i+1 == 1 or (i+1)%5==0:
        print("Processing text: ", i+1, "/", len(body_cleaned))
    text = body_cleaned[i]
    evaluation["model"].append(model)
    try:
        tic = time.time()
        llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': text})
        toc = time.time()

        response_time = round(toc - tic, 2)
        result = llm_response.content
        prompt_tokens = llm_response.response_metadata['token_usage']['prompt_tokens']
        completion_tokens = llm_response.response_metadata['token_usage']['completion_tokens']
        total_tokens = llm_response.response_metadata['token_usage']['total_tokens']

        evaluation["raw_response"].append(result)
        evaluation["response_time"].append(response_time)
        evaluation["prompt_tokens"].append(prompt_tokens)
        evaluation["completion_tokens"].append(completion_tokens)
        evaluation["total_tokens"].append(total_tokens)
        try:
            pydantic_parser.parse(result)
            evaluation["success"].append(1)
        except Exception as e:
            print(f"Error: {e}")
            evaluation["success"].append(0)
    except Exception as e:
        print(f"Error: {e}")
        evaluation["raw_response"].append(e)
        evaluation["response_time"].append("Error")
        evaluation["prompt_tokens"].append("Error")
        evaluation["completion_tokens"].append("Error")
        evaluation["total_tokens"].append("Error")
        evaluation["success"].append(0)

In [None]:
# df_eval = pd.DataFrame(evaluation)
# df_eval.to_csv('./outputs/llm_evaluation_gpt-3.5-turbo-0125.csv')

## 6. Testing Prompt Stuffing without Function Calling

### a. Setting up

In [58]:
INSTRUCTION = """ 
I need you to extract out a few things. 
I want to extract a few things: 
Firstly, One sentence summary of the article of maximum 200 characters, prefereably with the event, location and time information. The key should be title. 
Secondly, A short summary of the text, maximum 200 words. The key should be sumamry. 
Thirdly, Answer only Yes or No to this question: does this event negatively impact a supply chain network (the movement of people and goods)? Answer this by following the following reasoning steps:
            If the event can directly impact a supply chain network in a negative way, such as causing facility damage or traffic stopage, etc., then Yes.
            Else if it can potentially disrupt the normal operations a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions, etc., then asnwer Yes.
            If not or uncertain, such as general knowledge, good news, individual personnel events, project annoucement etc., answer No.
            The key should be Impact
Fourthly, The reasoning behind your impact assessment based on the impact reasoning step above. Explain why you think the event will (Yes) or will not (No) impact the supply chain network. The key should be reasoning. 
Fifthly, The names of the marine vessels or container ships mentioned in the text in a list of strings, if any. The key should be vessel_name.
Output in JSON format match:
"""


def generate_sql_statement(cortex_function:str, model:str, instruction:str, user_input:str):
    return f""" 
    SELECT SNOWFLAKE.CORTEX.{cortex_function}
    (
        '{model}',
        [
            {{
                'role': 'user',
                'content': CONCAT('{instruction}', '{user_input}')
            }}
        ],
        {{
            'temperature': 0
        }}
    )
    AS LLM_RESPONSE;            
    """

def evaluate_llm_sql(model:str, data:list, pydantic_parser, instruction:str):
    evaluation = {"model":[], "raw_response":[], "success":[], "response_time":[], "prompt_tokens":[], "completion_tokens":[], "total_tokens":[]}
    
    for i in range(len(data)):
        if i+1 == 1 or (i+1)%5==0:
            print("Processing text: ", i+1, "/", len(data))
        text = data[i]
        sql_statement = generate_sql_statement('complete', model, instruction, text)
        evaluation["model"].append(model)
        try:
            tic = time.time()
            l_rows = sp_session.sql(sql_statement).collect()
            llm_response = l_rows[0]['LLM_RESPONSE'] # this will be a string
            toc = time.time()
            response_time = round(toc - tic, 2)
            result = eval(llm_response)['choices'][0]['messages']
            prompt_tokens = eval(llm_response)['usage']['prompt_tokens']
            completion_tokens = eval(llm_response)['usage']['completion_tokens']
            total_tokens = eval(llm_response)['usage']['total_tokens']
            evaluation["raw_response"].append(result)
            evaluation["response_time"].append(response_time)
            evaluation["prompt_tokens"].append(prompt_tokens)
            evaluation["completion_tokens"].append(completion_tokens)
            evaluation["total_tokens"].append(total_tokens)
            try:
                pydantic_parser.parse(result)
                evaluation["success"].append(1)
            except Exception as e:
                print(f"Error: {e}")
                evaluation["success"].append(0)
        except Exception as e:
            print(f"Error: {e}")
            evaluation["raw_response"].append(e)
            evaluation["response_time"].append("Error")
            evaluation["prompt_tokens"].append("Error")
            evaluation["completion_tokens"].append("Error")
            evaluation["total_tokens"].append("Error")
            evaluation["success"].append(0)
            
    return evaluation

### b. Testing with a sample text

In [27]:
user_input = sample_text.replace("'", "\\'") # pre-requisite: escape the single quotes
cortex_function = 'complete'
model = 'mistral-7b'
sql_statement = generate_sql_statement(cortex_function, model, INSTRUCTION, user_input)
l_rows = sp_session.sql(sql_statement).collect()
llm_response = l_rows[0]['LLM_RESPONSE']

In [28]:
result = (eval(llm_response)['choices'][0]['messages'])

# we can validate the result against the schema
pydantic_parser = PydanticOutputParser(pydantic_object=NewsInfo)
parsed_result = pydantic_parser.parse(result)

In [29]:
# token counting:
llm_response_dict = eval(llm_response)
total_tokens = llm_response_dict['usage']['total_tokens']
prompt_tokens = llm_response_dict['usage']['prompt_tokens']
completion_tokens = llm_response_dict['usage']['completion_tokens']

In [44]:
# how the pydantic parser works: Any keys in the input data that do not match fields in the model will be ignored by default.
set(parsed_result.dict().keys()) == set(NewsInfo.__fields__.keys())

True

In [47]:
result

' {\n"title": "Baltimore Port Bridge Collapse: Dali Containership to be Removed by 10 May, Channel Closed for Deep-Draft Vessels",\n"one_sentence_summary": "Baltimore Port\'s Dali containership remains lodged under the collapsed Francis Scott Key Bridge, causing the closure of a deep-draft channel until its removal by 10 May.",\n"summary": "The Dali containership, which collided with the Francis Scott Key Bridge at Baltimore Port on 26 March, remains lodged under the bridge debris and has caused the closure of a 35ft deep channel since then. The incident has resulted in the closure of the channel for deep-draft vessels, though three temporary channels with shallower depths remain open. Maryland Governor Wes Moore has highlighted the challenges faced by the team attempting to clear the bridge and the vessel. The Dali containership is scheduled to be removed by 10 May.",\n"impact": "Yes",\n"reasoning": "The bridge collapse directly impacted the supply chain network by causing the closure

### Testing with a short list of articles

In [57]:
sample_data = body_cleaned[90:95]
evaluation = evaluate_llm_sql('gemma-7b', sample_data, pydantic_parser, instruction=INSTRUCTION)

Processing text:  1 / 5
Processing text:  5 / 5


In [59]:
evaluation['raw_response'][2]

'```json\n{\n  "title": "Mobile Crane Collapses on Cargo Vessel",\n  "summary": "A mobile crane collapsed onto the main deck of general cargo vessel, MEDKON RIZE (IMO: 9114347) during cargo operations at Marport, Turkey on 02 June 2024.",\n  "impact": "Yes",\n  "reasoning": "The incident caused the fall of 28 containers overboard and has the potential to disrupt the normal operations of the supply chain network.",\n  "vessel_name": ["MEDKON RIZE"]\n}\n```'

### Running through selected models

In [60]:
models = ['gemma-7b', 'mixtral-8x7b', 'llama2-70b-chat']
evaluations = []
for model_name in models:
    print(f"Running LLM {model_name}")
    evaluations.append(evaluate_llm_sql(model_name, body_cleaned, pydantic_parser, instruction=INSTRUCTION))
    print("="*50)

Running LLM gemma-7b
Processing text:  1 / 100
Processing text:  5 / 100
Processing text:  10 / 100
Processing text:  15 / 100
Processing text:  20 / 100
Processing text:  25 / 100
Error: Invalid json output: ```json
{
  "title": "Protesters take action against arms trade in Canada",
  "summary": "Protesters took action in Canada in the last week of May against the arming of and profiteering from the Israeli genocide against the Palestinians. The actions included blockades, pickets, and disruption of critical infrastructure.",
  "impact": "No",
  "reasoning": "While the event can potentially disrupt the normal operations of a supply chain network, it is not likely to directly impact a supply chain network.",
  "vessel_name": null,
  "reasoning_steps": {
    "If the event can directly impact a supply chain network in a negative way, such as causing facility damage or traffic stopage, etc., then Yes.",
    "Else if it can potentially disrupt the normal operations a supply chain network, 

In [68]:
evaluations[2]

{'model': ['llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'llama2-70b-chat',
  'l

In [69]:
df_eval = pd.DataFrame(evaluations[2])
df_eval.to_csv('./outputs/snowpark_llm/pure_prompting/llm_evaluation_llama2-70b-chat.csv')

We can iterate through the available models and measure the average response time over 10 trials

## 4. Examining the issue caused by special characters

SQL Compilation error will be raised if there is single quote in the text. This is because the LLM _call is based on the SQL syntax below. Hence if there is a single quotation inside the prompt text, the SQL command will be escaped prematurely.<br>
`SELECT SNOWFLAKE.CORTEX.COMPLETE('<model_name>', '<prompt_text>');`

In [None]:
sample_text_adversary_1 = """
Officials involved in the clear up of the bridge collapse at Baltimore Port in the US have said the vessel that remains lodged among debris will be removed by 10 May.
While some ships have been able to navigate in and out of the port through a limited access channel opened up by the coastguard in the middle of the collapsed bridge, the Dali containership has remained in the place where it collided with the Francis Scott Key Bridge since the incident took place at the major port in Maryland on 26 March.
Ahead of the planned removal of the ship, a 35ft deep Fort McHenry Limited Access Channel that had been open for four days and allowed the first container ship to return to the port closed on 29 April, though the three other temporary channels, which are 20, 14 and 11ft deep, will remain open.
Maryland Governor Wes Moore highlighted some of the difficulties that have been faced by the team attempting to clear the bridge debris and Dali vessel.

"That work is remarkably complicated, we're talking about a massive piece of steel," he said.

"""

sample_text_adversary_2 = """
Officials involved in the clear up of the bridge collapse at Baltimore Port in the US have said the vessel that remains lodged among debris will be removed by 10 May.
While some ships have been able to navigate in and out of the port through a limited access channel opened up by the coastguard in the middle of the collapsed bridge, the Dali containership has remained in the place where it collided with the Francis Scott Key Bridge since the incident took place at the major port in Maryland on 26 March.
Ahead of the planned removal of the ship, a 35ft deep Fort McHenry Limited Access Channel that had been open for four days and allowed the first container ship to return to the port closed on 29 April, though the three other temporary channels, which are 20, 14 and 11ft deep, will remain open.
Maryland Governor Wes Moore highlighted some of the difficulties that have been faced by the team attempting to clear the bridge debris and Dali vessel.

"I'm proud of all the people involved!" he said.

"""

adversary_text = """ 
Officials involved in the clear up of the bridge collapse at Baltimore Port in the US have said the vessel that remains lodged among debris will be removed by 10 May.
While some ships have been able to navigate in and out of the port through a limited access channel opened up by the coastguard in the middle of the collapsed bridge, the Dali containership has remained in the place where it collided with the Francis Scott Key Bridge since the incident took place at the major port in Maryland on 26 March.
Ahead of the planned removal of the ship, a 35ft deep Fort McHenry Limited Access Channel that had been open for four days and allowed the first container ship to return to the port closed on 29 April, though the three other temporary channels, which are 20, 14 and 11ft deep, will remain open.
Maryland Governor Wes Moore highlighted some of the difficulties that have been faced by the team attempting to clear the bridge debris and Dali vessel.

"I{} proud of all the people involved!" he said.
"""

In [None]:
# deliberately using an adversarial text with special characters
try:
    llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': sample_text_adversary_2})
    result = eval(eval(llm_response)['choices'][0]['messages'])
    print(result)
except Exception as e:
    print(e)


In [None]:
# escape the single quotes in the text to resolve this issue:
sample_text_adversary_2_cleaned = sample_text_adversary_2.replace("'", "\\'")
try:
    llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': sample_text_adversary_2_cleaned})
    result = eval(eval(llm_response)['choices'][0]['messages'])
    print(result)
except Exception as e:
    print(e)

In [None]:
# we can examine other special characters that may cause issues with interfering with the SQL statement
# result: nothing else, just the single quotation

# create a formatted string:
special_chars = ['-','--','"', "'", "!", "?", ".", ",", ":", ";", "(", ")", "{", "}", "[", "]", "<", ">", "&", "*", "#", "@", "$", "%", "^", "&", "*", "+", "-", "_", "=", "~", "`", "|", "\\"]
error_chars = []
good_chars = []
# adversary_texts = [adversary_text.format(char) for char in special_chars]
for char in special_chars:
    print(f"Adversary text with special character: {char}")
    try:
        llm_response  = chain.invoke({'format_instructions' : format_instructions, 'news_content': adversary_text.format(char)})
        good_chars.append(char)
    except Exception as e:
        print(f"Error: {e}")
        error_chars.append(char)
        print("="*50)


In [None]:
# list of special characters that will cause errors in snowflake cortex
error_chars