In [1]:
import os
import pandas as pd
from langchain_core.output_parsers.pydantic import PydanticOutputParser
from typing import Any, List, Mapping, Optional
from langchain_core.pydantic_v1 import BaseModel, Field

class NewsInfoLenient(BaseModel, extra="allow"):
    """Information extracted from the text."""
    title: str = Field(
        description="One sentence summary of the article of maximum 200 characters, prefereably with the event, location and time information."
    )
    summary: str = Field(
        description="A short summary of the text, maximum 200 words"
    )
    impact: str = Field( 
        description="Answer only Yes or No to this question: does this event negatively impact a supply chain network (the movement of people and goods)? Answer this by following the following reasoning steps: \
            If the event can directly impact a supply chain network in a negative way, such as causing facility damage or traffic stopage, etc., then Yes. \
            Else if it can potentially disrupt the normal operations a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions, etc., then asnwer Yes. \
            If not or uncertain, such as general knowledge, good news, individual personnel events, project annoucement etc., answer No",
        enum=["Yes", "No"]
    )
    reasoning: str = Field( 
        description="The reasoning behind your impact assessment based on the impact reasoning step above. Explain why you think the event will (Yes) or will not (No) impact the supply chain network."
    )
    vessel_name: Optional[list[str]] = Field(
        default=[""], 
        description="The names of the marine vessels or container ships mentioned in the text, if any."
    )

class NewsInfoStrict(BaseModel, extra="forbid"):
    """Information extracted from the text."""
    title: str = Field(
        description="One sentence summary of the article of maximum 200 characters, prefereably with the event, location and time information."
    )
    summary: str = Field(
        description="A short summary of the text, maximum 200 words"
    )
    impact: str = Field( 
        description="Answer only Yes or No to this question: does this event negatively impact a supply chain network (the movement of people and goods)? Answer this by following the following reasoning steps: \
            If the event can directly impact a supply chain network in a negative way, such as causing facility damage or traffic stopage, etc., then Yes. \
            Else if it can potentially disrupt the normal operations a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions, etc., then asnwer Yes. \
            If not or uncertain, such as general knowledge, good news, individual personnel events, project annoucement etc., answer No",
        enum=["Yes", "No"]
    )
    reasoning: str = Field( 
        description="The reasoning behind your impact assessment based on the impact reasoning step above. Explain why you think the event will (Yes) or will not (No) impact the supply chain network."
    )
    vessel_name: Optional[list[str]] = Field(
        default=[""], 
        description="The names of the marine vessels or container ships mentioned in the text, if any."
    )

In [33]:
def validate_json(content: str):
    """Check if the json is valid (free from out-of-schema hallucination)"""
    try:
        eval(content)
        return 1
    except Exception as e:
        return 0

def validate_schema(content: str, pydantic_model):
    """Check if the output schema is exactly the same as the pydantic model (free from in-schema hallucination)
    This will also check if JSON is valid"""
    try: 
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        content_dict = parser.parse(content).dict()
        if set(pydantic_model.__fields__.keys()) == set(content_dict.keys()):
            return 1
        else:
            return 0
    except Exception as e:
        return 0

In [3]:
content_invalid_json = """
{
"title": "Encephalitis Cases on the Rise in Lo Cai, Vietnam",
"summary": "In Lo Cai, Vietnam, there has been an increase in encephalitis cases this summer, with 16 children treated at the Emergency Intensive Care Unit under Lo Cai Obstetrics and Paediatric Hospital since April. The article advises parents to be vigilant and detect early signs of encephalitis in their children, and provides information on the causes, symptoms, and prevention of the disease.",
"impact": "Yes",
"reasoning": "The article mentions that encephalitis can negatively impact supply chain networks by disrupting the normal operations of a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions. The increase in encephalitis cases in Lo Cai could potentially disrupt the supply chain network in the area, especially if it spreads to other regions.",
"vessel_name": []
}

The reasoning behind the impact assessment is that encephalitis can cause disruptions in the supply chain network by affecting the normal operations of the network, such as social-political disruptions, extreme weathers, or other disruptions. The increase in encephalitis cases in Lo Cai could potentially disrupt the supply chain network in the area, especially if it spreads to other regions. However, since the article does not mention any specific impact on the supply chain network, the impact assessment is based on the potential for disruption.

There are no marine vessels or container ships mentioned in the text, so the list of vessel names is empty.
"""

content_valid_json = """
{
"title": "Encephalitis Cases on the Rise in Lo Cai, Vietnam",
"one_line_summary": "summary",
"summary": "In Lo Cai, Vietnam, there has been an increase in encephalitis cases this summer, with 16 children treated at the Emergency Intensive Care Unit under Lo Cai Obstetrics and Paediatric Hospital since April. The article advises parents to be vigilant and detect early signs of encephalitis in their children, and provides information on the causes, symptoms, and prevention of the disease.",
"impact": "Yes",
"reasoning": "The article mentions that encephalitis can negatively impact supply chain networks by disrupting the normal operations of a supply chain network, such as social-political disruptions, extreme weathers, or other disruptions. The increase in encephalitis cases in Lo Cai could potentially disrupt the supply chain network in the area, especially if it spreads to other regions.",
"vessel_name": []
}
"""

In [4]:
print(validate_schema(content_valid_json, NewsInfoLenient))
print(validate_json(content_invalid_json))

0
0


## Import LLM responses

In [45]:
parent_dir = './outputs/snowpark_llm/function_calling/'
files = os.listdir(parent_dir)
files = [f for f in files if f.endswith('.csv')]
files

['llm_evaluation_gemma-7b.csv',
 'llm_evaluation_gpt-3.5-turbo-0125.csv',
 'llm_evaluation_llama2-70b-chat.csv',
 'llm_evaluation_mistral-7b.csv',
 'llm_evaluation_mixtral-8x7b.csv']

In [46]:
for file in files:
    df = pd.read_csv(parent_dir + file)
    df['valid_json'] = df['raw_response'].apply(lambda x: validate_json(x))
    df['valid_schema'] = df['raw_response'].apply(lambda x: validate_schema(x, NewsInfoLenient))
    df.to_csv(parent_dir + '/eval/' + file, index=False)