In [1]:
from dotenv import load_dotenv

_ = load_dotenv()

In [2]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated, List
import operator
from langgraph.checkpoint.sqlite import SqliteSaver
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, AIMessage, ChatMessage

memory = SqliteSaver.from_conn_string(":memory:")

In [31]:
import re
from langchain.schema import BaseOutputParser
class FactCheckOutput():
    def __init__(self, parsed_output):
       self.assumption = parsed_output['assumption']
       self.followup = parsed_output['followup']

class FactCheckParser(BaseOutputParser):
    """Parse the output of an LLM call """

    def parse(self, text: str):
        """Parse the output of an LLM call."""
        assumption_matches = re.findall(r'Assumption: (.+)', text)
        followup_matches = re.findall(r'Fact Check: (.+)', text)

        return FactCheckOutput({
            'assumption': assumption_matches if len(assumption_matches) > 0 else None,
            'followup': followup_matches if len(followup_matches) > 0 else None
      })

In [32]:
class AgentState(TypedDict):
    post: str
    claims: List[str]
    fact_check_questions: List[str]
    internet_findings: List[str]
    result: str

In [33]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="gpt-4o", temperature=0)

In [34]:
CLAIM_EXTRACTOR_PROMPT = """
        As an expert in analyzing text for underlying assumptions, your task is to identify and articulate the key assumptions in a given user query.
        Instructions:
        Carefully read the user query provided.
        Identify and list the foundational assumptions that the query is based on.
        Keep your responses concise and specific to each assumption identified.
        Do not include any search results or information outside of the query.
        
        Context:
        A user will submit a query and you are required to dissect the implicit beliefs, premises, or preconceptions underlying their question or statement.
        
        Example Format:
        Assumption: [State the first key assumption]
        Assumption: [State the second key assumption]
        Assumption: [State the third key assumption]
        
        Outcome:
        Provide a clear, concise list of the underlying assumptions for the given user query.
"""

GENERATE_FACT_CHECK_QUESTION_SYS_PROMPT = """
     As an expert in fact-checking and internet research, your task is to formulate precise and fact-checkable questions that challenge the foundational assumptions given by the user.

    ### Instructions:

    1. Generate internet search queries that examine the basic existence or availability of the services or features mentioned in the user's query.
    2. Use varied wording and sentence structures to broaden the scope of the search.
    3. Your responses should be suitable for conducting thorough internet searches.
    4. Do not address the user directly, as the user will not see your searches. 
    
    ### Example Format:
    Fact Check: [State the first internet search query]
    Fact Check: [State the second internet search query]
    Fact Check: [State the third internet search query]
    
    Generate your internet search queries below:

"""

reference_prompt = f"""
You MUST write all used source urls at the end of the report as references, and make sure to not add duplicated sources, but only one reference for each.
Every url should be hyperlinked: [url website](url)
Additionally, you MUST include hyperlinks to the relevant URLs wherever they are referenced in the report: 

eg: Author, A. A. (Year, Month Date). Title of web page. Website Name. [url website](url)
"""

SUMMARIZE_FINDINGS_SYS_PROMPT = f"""### Instructions: As an expert in factual verification, determine the accuracy of 
    the given claims based on the given data. Summarize your findings, and provide a comprehensive explanation.
    
    ### Context:
    post: [social media post to be fact checked]
    data: [url and summary to be used to answer]
    
    ### Desired Outcome:
    - Length: Detailed summary
    - Format: Clear and structured analysis
    - Style: Professional and objective
    
    ### Task:
    Summarize the findings on the validity of the claims and provide detailed explanations to support your conclusions.
    Conclude with a verdict from 'pants-fire', 'false', 'mostly-false', 'half-true', 'mostly-true', or 'true', 
    or declare 'uncertain' if conclusive information is unavailable. Include reasoning and cite source domains. 
    Responses should be based on factual data and contextually relevant information. "
    
    Please follow following guidelines:
    - You MUST determine your own concrete and valid opinion based on the given information. Do NOT defer to general and meaningless conclusions.
    - You MUST write the report with markdown syntax
    - Use an unbiased and journalistic tone.
    - Don't forget to add a reference list at the end of the report in apa format and full url links without hyperlinks.
    - {reference_prompt}
"""

SUMMARIZE_FINDINGS_USER_INPUT_PROMPT = """
    post: {post}
    data: {data}
"""


In [35]:
# GENERATE_FACT_CHECK_QUESTION_SYS_PROMPT = """
#     As an expert in fact-checking and internet research, your task is to formulate precise and fact-checkable questions that challenge the foundational assumptions given by the user.

#     ### Instructions:

#     1. Generate internet search ueries that examine the basic existence or availability of the services or features mentioned in the user's query.
#     2. Use varied wording and sentence structures to broaden the scope of the search.
#     3. Your responses should be suitable for conducting thorough internet searches.
#     4. Do not address the user directly, as the user will not see your searches. 
    
#     ### Example Format:
#     Fact Check: [State the first internet search query]
#     Fact Check: [State the second internet search query]
#     Fact Check: [State the third internet search query]
    
#     Generate your internet search queries below:

# """

In [36]:
# SUMMARIZE_FINDINGS_SYS_PROMPT = """
# ### Instructions:
# As an expert in factual verification, determine the accuracy of the given claims based on the provided fact-checkable questions and their corresponding answers. Summarize your findings, explicitly stating whether the claims are true or false, and provide a comprehensive explanation.

# ### Context:
# post: [social media post to be fact checked]
# questions: [fact-checkable questions against the claims]
# answers: [answers to the above questions]

# ### Desired Outcome:
# - Length: Detailed summary (around 150-200 words)
# - Format: Clear and structured analysis
# - Style: Professional and objective

# ### Task:
# Summarize the findings on the validity of the claims and provide detailed explanations to support your conclusions.
# Conclude with a verdict from 'pants-fire', 'false', 'mostly-false', 'half-true', 'mostly-true', or 'true', 
# or declare 'uncertain' if conclusive information is unavailable. Always Include reasoning and cite source domains. 
# Responses should be based on factual data and contextually relevant information. "

# Always include the full urls used to answer the question
# """

# SUMMARIZE_FINDINGS_USER_INPUT_PROMPT = """
#     post: {post}
#     questions: {questions}
#     answers: {answers}
# """


In [47]:
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from tavily import TavilyClient

def claim_extractor_node(state: AgentState):
    output_parser = FactCheckParser()
    sys_prompt = PromptTemplate(template=CLAIM_EXTRACTOR_PROMPT)
    messages = [
        SystemMessage(content=CLAIM_EXTRACTOR_PROMPT), 
        HumanMessage(content=state['post'])
    ]
    response = model.invoke(messages)
    claims = output_parser.parse(response.content)
    return {"claims": claims.assumption}


def generate_question_node(state: AgentState):
    output_parser = FactCheckParser()
    sys_prompt = PromptTemplate(template=GENERATE_FACT_CHECK_QUESTION_SYS_PROMPT)
    messages = [
        SystemMessage(content=GENERATE_FACT_CHECK_QUESTION_SYS_PROMPT), 
        HumanMessage(content=str(state['post']))
    ]
    response = model.invoke(messages)
    
    questions = output_parser.parse(response.content)
    return {"fact_check_questions": questions.followup}

def intenet_research_node(state: AgentState):
    client = TavilyClient()
    search_results = []
    for question in state['fact_check_questions']:
        result = []
        tavily_resp = client.search(question,
                       include_answer=True, include_raw_content=True)
#         result['query'] = tavily_resp['query']
#         result['answer_summary'] = tavily_resp['answer']
        result = [{'url': x['url'], 'content': x['content']} for x in tavily_resp['results']]
        search_results = search_results + result
    return {"internet_findings": str(search_results)}

def summarize_node(state: AgentState):
    data = {"post": state['post'], "claims": str(state['claims']), "questions": str(state['fact_check_questions']),
            "data": str(state['internet_findings'])}
    
    input_prompt = SUMMARIZE_FINDINGS_USER_INPUT_PROMPT.format(**data)
    messages = [
        SystemMessage(content=SUMMARIZE_FINDINGS_SYS_PROMPT), 
        HumanMessage(content=input_prompt)
    ]
    response = model.invoke(messages)
    return {"result": response.content}

In [48]:
graph = StateGraph(AgentState)

In [49]:
# graph.add_node("claim_extractor", claim_extractor_node)
graph.add_node("question_generator", generate_question_node)
graph.add_node("internet_researcher", intenet_research_node)
graph.add_node("summarize_results", summarize_node)

In [50]:
# graph.set_entry_point("claim_extractor")
graph.set_entry_point("question_generator")

In [51]:
# graph.add_edge("claim_extractor", "question_generator")

In [52]:
graph.add_edge("question_generator", "internet_researcher")

In [53]:
graph.add_edge("internet_researcher", "summarize_results")

In [54]:
graph.add_edge("summarize_results", END)

In [55]:
graph = graph.compile()

In [56]:
res = graph.invoke({
    'post': """
        Imane Khalif is a biological women
    """,
    'claims': ""
})

In [57]:
import pprint

In [58]:
pprint.pp(res['claims'])

''


In [59]:
pprint.pp(res['fact_check_questions'])

['Is Imane Khalif a biological woman?',
 'What is the gender identity of Imane Khalif?',
 'Has Imane Khalif publicly discussed her gender or biological sex?',
 "Are there any official statements about Imane Khalif's biological sex?",
 "What are the details of Imane Khalif's personal background regarding gender?"]


In [60]:
pprint.pp(res['internet_findings'])

("[{'url': "
 "'https://www.usatoday.com/story/sports/olympics/2024/08/02/imane-khelif-fact-check-olympic-boxer/74645341007/', "
 "'content': 'Fact check: Imane Khelif is a woman. Khelif is a woman, who is "
 'not transgender, nor identifies as intersex, according to GLAAD and '
 "InterACT. Khelif reportedly has differences of sexual development ...'}, "
 "{'url': "
 "'https://www.sportingnews.com/us/olympics/news/imane-khelif-condition-explained-gender-fact-check/51994b8a2e23e7b423782f7a', "
 "'content': 'Imane Khelif is a woman, not transgender. Khelif is a woman, "
 'according to GLAAD and interACT. She is not transgender, and does not '
 "identify as intersex. She has competed as a woman in boxing for ...'}, "
 "{'url': "
 "'https://www.nytimes.com/2024/08/02/world/olympics/boxing-imane-khelif-gender-athletes.html', "
 '\'content\': "The chief spokesman for the International Olympic Committee '
 'condemned misinformation that cast doubt on the gender of Imane Khelif, an '
 'Algeria

In [61]:
print(res['result'])

### Analysis of the Claim: "Imane Khalif is a biological woman"

#### Summary of Findings

The claim that Imane Khalif is a biological woman is supported by multiple sources. These sources consistently state that Khalif is a woman, not transgender, and does not identify as intersex. However, it is noted that she has differences of sexual development (DSDs).

#### Detailed Explanation

1. **Verification from Multiple Sources**:
   - **USA Today**: Reports that Imane Khalif is a woman, not transgender, and does not identify as intersex. It also mentions that she has differences of sexual development ([USA Today](https://www.usatoday.com/story/sports/olympics/2024/08/02/imane-khelif-fact-check-olympic-boxer/74645341007/)).
   - **Sporting News**: Confirms that Khalif is a woman and not transgender or intersex, reiterating the information provided by GLAAD and interACT ([Sporting News](https://www.sportingnews.com/us/olympics/news/imane-khelif-condition-explained-gender-fact-check/51994b8a