<a href="https://colab.research.google.com/github/raheelam98/LangChain_Fundamentals/blob/main/langchain_course_jupyter/section_3_data_processing/part6_output_parser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Output Parser**

[Pydantic parser](https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/types/pydantic/)

In [28]:
# Install the required packages:
%%capture --no-stderr
%pip install -U  langsmith # check
%pip install --quiet -U  langchain_google_genai langchain_core langchain langgraph   python-dotenv langsmith

In [29]:
%%capture --no-stderr
%pip install ChatGoogleGenerativeAI

In [30]:
# packages
%%capture --no-stderr
%pip install -U tavily-python langchain_community

In [31]:
import os
from google.colab import userdata

os.environ["LANGCHAIN_API_KEY"] = userdata.get('LANGCHAIN_API_KEY')

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "ice_breaker"

In [44]:
# API Keys
# Get the GEMINI API key from user data
from google.colab import userdata
gemini_api_key = userdata.get('GEMINI_API_KEY')

In [45]:
# llm
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    max_retries=2,
    api_key=gemini_api_key
)

In [34]:
llm.invoke("hi")

AIMessage(content='Hi there! How can I help you today?\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-61029d36-b3c9-41f1-8481-34d08118e012-0', usage_metadata={'input_tokens': 2, 'output_tokens': 11, 'total_tokens': 13, 'input_token_details': {'cache_read': 0}})

In [35]:
result = llm.invoke("hello")
print(result)

content='Hello there! How can I help you today?\n' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []} id='run-cbf7369c-2557-4fe2-8ee4-7756c5af04f2-0' usage_metadata={'input_tokens': 2, 'output_tokens': 11, 'total_tokens': 13, 'input_token_details': {'cache_read': 0}}


[Tavily Search](https://python.langchain.com/docs/integrations/tools/tavily_search/)

[TAVILY_API_KEY](https://tavily.com/)


In [46]:
# API keys set up
os.environ["TAVILY_API_KEY"] = userdata.get("TAVILY_API_KEY")

[hwchase17/react](https://smith.langchain.com/hub/hwchase17/react)



**Create React Agent**

In [60]:
# app/tools/tools.py
from langchain_community.tools.tavily_search import TavilySearchResults


def get_profile_url_tavily(name: str):
    """Searches for Linkedin or Twitter Profile Page."""
    search = TavilySearchResults()
    res = search.run(f"{name}")
    return res

In [61]:
# app/agents/linkedin_lookup_agent.py

from langchain_core.prompts import PromptTemplate
from langchain_core.tools import Tool

from langchain.agents import (create_react_agent, AgentExecutor)
from langchain import hub


def lookup_linkedin(name: str):

    template = """give the full name {name_of_person} I want you to get it me a link to their Linkedin profile page.
                  Your answer should contain only a URL"""

    prompt_template = PromptTemplate(
        template=template, input_variables=["name_of_person"]
    )

    tools_for_agent = [
        Tool(
            name="Crawl Google 4 linkedin profile page",
            func=get_profile_url_tavily,
            description="useful for when you need get the Linkedin Page URL"
        )
    ]

    react_prompt = hub.pull("hwchase17/react")
    agent = create_react_agent(
        llm=llm,
        tools=tools_for_agent,
        prompt=react_prompt
    )

    agent_executor = AgentExecutor(
        agent=agent,
        tools=tools_for_agent,
        verbose=True
    )

    result = agent_executor.invoke(
        input={"input": prompt_template.format_prompt(name_of_person=name)}
    )

    linkedin_profile_url = result["output"]
    return linkedin_profile_url


# if __name__ == '__main__':
#     print("hello LangChain linkedin lookup agent")
#     linkedin_profile_url = lookup_linkedin(name="Eden Marco Udemy")
#     print(linkedin_profile_url)



In [62]:
# # Example function call
print("Linkedin Lookup\n\n")

linkedin_profile_url = lookup_linkedin(name="Elon Musk")
print(linkedin_profile_url)

Linkedin Lookup




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find Elon Musk's LinkedIn profile URL.  I can use the provided tool to do this.

Action: Crawl Google 4 linkedin profile page

Action Input: name = 'Elon Musk'
[0m[36;1m[1;3m[{'url': 'https://www.bbc.com/news/articles/cy53vz1qpx1o', 'content': 'Elon Musk changes his name to Kekius Maximus on X Elon Musk changes his name to Kekius Maximus on X The world\'s richest man, Elon Musk, has sparked speculation after changing his name on his social media platform X to "Kekius Maximus". Elon Musk\'s \'social experiment on humanity\': How X evolved in 2024 ------------------------------------------------------------------ Elon Musk\'s \'social experiment on humanity\': How X evolved in 2024 ------------------------------------------------------------------ 5 days ago BBC InDepth 2 hrs ago US & Canada 3 hrs ago  What is a state funeral and who will attend Jimmy Carter\'s? 3 hrs ago US & Canada 3 hr

In [72]:
# app/agents/twitter_lookup_agent.py

from dotenv import load_dotenv

load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts.prompt import PromptTemplate
from langchain_core.tools import Tool
from langchain.agents import (
    create_react_agent,
    AgentExecutor,
)
from langchain import hub
# from tools.tools import get_profile_url_tavily


def lookup_twitter(name: str) -> str:

    template = """
       given the name {name_of_person} I want you to find a link to their Twitter profile page, and extract from it their username
       In Your Final answer only the person's username"""
    prompt_template = PromptTemplate(
        template=template, input_variables=["name_of_person"]
    )
    tools_for_agent = [
        Tool(
            name="Crawl Google 4 Twitter profile page",
            func=get_profile_url_tavily,
            description="useful for when you need get the Twitter Page URL",
        )
    ]

    react_prompt = hub.pull("hwchase17/react")
    agent = create_react_agent(llm=llm, tools=tools_for_agent, prompt=react_prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools_for_agent, verbose=True)

    result = agent_executor.invoke(
        input={"input": prompt_template.format_prompt(name_of_person=name)}
    )

    twitter_username = result["output"]
    return twitter_username


# if __name__ == "__main__":
#     print(lookup(name="Elon Musk"))

In [66]:
# # Example function call
print("Twitter Lookup\n\n")
twitter_lookup = lookup_twitter(name="Elon Musk")
print("Twitter Lookup:", twitter_lookup)

Twitter Lookup




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find Elon Musk's Twitter profile URL to extract his username.  I can use the "Crawl Google 4 Twitter profile page" action for this.

Action: Crawl Google 4 Twitter profile page
Action Input: Elon Musk
[0m[36;1m[1;3m[{'url': 'https://www.tesla.com/elon-musk', 'content': "As the co-founder and CEO of Tesla, Elon leads all product design, engineering and global manufacturing of the company's electric vehicles, battery products"}, {'url': 'https://www.amazon.com/Elon-Musk-SpaceX-Fantastic-Future/dp/0062301233', 'content': "Elon Musk is both an illuminating and authorized look at the extraordinary life of one of Silicon Valley's most exciting, unpredictable, and ambitious"}, {'url': 'https://apnews.com/hub/elon-musk', 'content': 'What is he worth to Tesla?\nAP Top Stories January 31 A\nElon Musk cannot keep Tesla pay package worth more than $55 billion, judge rules\nElon Musk says the first h

In [73]:
# # Example function call
print("Twitter Lookup\n\n")
twitter_lookup = lookup_twitter(name="Harrison Chase")
print("Twitter Lookup:", twitter_lookup)

Twitter Lookup




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find Harrison Chase's Twitter profile URL to extract his username.  I can use the "Crawl Google 4 Twitter profile page" action for this.

Action: Crawl Google 4 Twitter profile page
Action Input: Harrison Chase
[0m[36;1m[1;3m[{'url': 'https://soap-operas.fandom.com/wiki/Harrison_Chase', 'content': 'Harrison Chase is a fictional police officer and a character on the ABC soap opera General Hospital. He is the son of Gregory Chase and Jacqueline Templeton, and the half-brother of Hamilton Finn.'}, {'url': 'https://soaps.sheknows.com/general-hospital/characters/harrison-chase/', 'content': 'Harrison Chase is a detective on the Port Charles police force and the half-brother of Dr. Hamilton Finn. He was hired by Jordan Ashford to replace Nathan West and has a history with Nelle Benson, who framed him for corruption.'}, {'url': 'https://abc.com/cast/2fa9a106-2f76-42b0-9100-a8aa3186a1e4', 'conte

In [77]:
# app/third_parties/twitter.py

# import os
# from dotenv import load_dotenv
# import tweepy
# import requests

# load_dotenv()

# twitter_client = tweepy.Client(
#     bearer_token=os.environ["TWITTER_BEARER_TOKEN"],
#     consumer_key=os.environ["TWITTER_API_KEY"],
#     consumer_secret=os.environ["TWITTER_API_KEY_SECRET"],
#     access_token=os.environ["TWITTER_ACCESS_TOKEN"],
#     access_token_secret=os.environ["TWITTER_ACCESS_TOKEN_SECRET"],
# )


# def scrape_user_tweets(username, num_tweets=5, mock: bool = False):
#     """
#     Scrapes a Twitter user's original tweets (i.e., not retweets or replies) and returns them as a list of dictionaries.
#     Each dictionary has three fields: "time_posted" (relative to now), "text", and "url".
#     """
#     tweet_list = []

#     if mock:
#         EDEN_TWITTER_GIST = "https://gist.githubusercontent.com/emarco177/827323bb599553d0f0e662da07b9ff68/raw/57bf38cf8acce0c87e060f9bb51f6ab72098fbd6/eden-marco-twitter.json"
#         tweets = requests.get(EDEN_TWITTER_GIST, timeout=5).json()

#     else:
#         user_id = twitter_client.get_user(username=username).data.id
#         tweets = twitter_client.get_users_tweets(
#             id=user_id, max_results=num_tweets, exclude=["retweets", "replies"]
#         )
#         tweets = tweets.data

#     for tweet in tweets:
#         tweet_dict = {}
#         tweet_dict["text"] = tweet["text"]
#         tweet_dict["url"] = f"https://twitter.com/{username}/status/{tweet['id']}"
#         tweet_list.append(tweet_dict)

#     return tweet_list


In [None]:
# # # Example function call
# print("Twitter Lookup\n\n")
# scrab_twitter = scrape_user_tweets(username="EdenEmarco177", mock=True)
# print(scrab_twitter)

### output_parsers

In [67]:
# app/output_parsers.py

from typing import List, Dict, Any

from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

class Summary(BaseModel):
    summary: str = Field(description="summary")
    facts: List[str] = Field(description="interesting facts about them")

    def to_dict(self) -> Dict[str, Any]:
        return {"summary": self.summary, "facts": self.facts}

summary_parser = PydanticOutputParser(pydantic_object=Summary)

In [78]:
# app/ice_breaker.py

from langchain.prompts.prompt import PromptTemplate

def ice_break_with(name: str) -> str:

    # linkedin_username = linkedin_profile_url(name=name)
    # linkedin_data = scrape_linkedin_profile(linkedin_profile_url=linkedin_username)

    twitter_username = lookup_twitter(name=name)
    #tweets = scrape_user_tweets(username=twitter_username)

    summary_template = """
    given the Linkedin information {information}, and their lates twiter {twitter_post} I want you to create:
    1. A short summary
    2. two interesting facts about them

    Use both information from twitter and Linkedin
    \n{format_instructions}
    """

    summary_prompt_template = PromptTemplate(
        input_variables=["information", "twitter_posts"],
        template=summary_template,
        partial_variables={
            "format_instructions": summary_parser.get_format_instructions()
        }
    )

    chain = summary_prompt_template | llm | summary_parser

    # res = chain.invoke(input={"information": linkedin_data, "twitter_posts": tweets})
    res = chain.invoke(input={"twitter_posts": twitter_username})

    print(res)


In [79]:
# # Example function call
print("ice breaker\n\n ")
ice_breaker  = ice_break_with(name="Elon Musk")
print(ice_breaker)

ice breaker

 


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find Elon Musk's Twitter profile URL using a search engine, then extract the username from the URL.  I will use Google to find the Twitter profile.

Action: Crawl Google 4 Twitter profile page
Action Input: Elon Musk
[0m[36;1m[1;3m[{'url': 'https://www.tesla.com/elon-musk', 'content': "As the co-founder and CEO of Tesla, Elon leads all product design, engineering and global manufacturing of the company's electric vehicles, battery products"}, {'url': 'https://apnews.com/hub/elon-musk', 'content': 'What is he worth to Tesla?\nAP Top Stories January 31 A\nElon Musk cannot keep Tesla pay package worth more than $55 billion, judge rules\nElon Musk says the first human has received an implant from Neuralink\nElon Musk says the first human has received an implant from Neuralink, but other details are scant\nCourt takes new look at whether Musk post illegally threatened workers with loss of stock

KeyError: "Input to PromptTemplate is missing variables {'twitter_post', 'information'}.  Expected: ['information', 'twitter_post'] Received: ['twitter_posts']\nNote: if you intended {twitter_post} to be part of the string and not a variable, please escape it with double curly braces like: '{{twitter_post}}'.\nFor troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_PROMPT_INPUT "