<a href="https://colab.research.google.com/github/tarunku/open_llm/blob/main/How_to_Build_LangChain_Agents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain langgraph langchain-openai langchain_community  transformers torch accelerate bitsandbytes wikipedia youtube_search

In [None]:
from langchain_community.tools import WikipediaQueryRun  # pip install wikipedia
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import YouTubeSearchTool  # pip install youtube_search
from langchain_openai import ChatOpenAI
from langchain_community.tools.openai_dalle_image_generation import (
   OpenAIDALLEImageGenerationTool
)
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper

import torch
from huggingface_hub import login
from langchain import HuggingFaceHub
from langchain.llms import HuggingFacePipeline

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextStreamer,
    BitsAndBytesConfig,
    AutoModelForSpeechSeq2Seq,
    pipeline
)

import os

from IPython.display import Image, display, YouTubeVideo
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ["LANGSMITH_API_KEY"] = userdata.get("LANGSMITH_API_KEY")


In [None]:

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
def load_pipeline(READER_MODEL_NAME):
  bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
  )

  # Ensure proper device placement
  device = "cuda" if torch.cuda.is_available() else "cpu"
  print(f"Loading model on: {device}")

  model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, device_map="auto", quantization_config=bnb_config)

  tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
  tokenizer.pad_token = tokenizer.eos_token

  streamer = TextStreamer(tokenizer, timeout=600) # Increased timeout to 120 seconds


  text_gen_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        do_sample=True,
        temperature=0.2,
        repetition_penalty=1.1,
        return_full_text=False,
        max_new_tokens=250,
        streamer=streamer
    )

  return text_gen_pipeline, tokenizer


In [None]:
# Set up the OpenAI model,
openai_llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=userdata.get('OPENAI_API_KEY'))

# Set up the Opensource model,

pipeline, tokenizer = load_pipeline("HuggingFaceH4/zephyr-7b-beta")
#pipeline, tokenizer = load_pipeline("meta-llama/Meta-Llama-3.1-8B-Instruct")

device = "cuda" if torch.cuda.is_available() else "cpu"
os_llm = HuggingFacePipeline(
    pipeline=pipeline
)


In [None]:
print(f"Model is on: {pipeline.model.device}")

In [None]:
tokens = tokenizer("Test input", return_tensors="pt")  # Convert text to tensor
print(f"Tokenizer outputs are on: {tokens['input_ids'].device}")

In [None]:
def tokenize_on_cuda(text):
    tokens = tokenizer(text, return_tensors="pt")  # Tokenize
    return {key: value.to(device) for key, value in tokens.items()}

tokens = tokenize_on_cuda("Example input")
print(tokens['input_ids'].device)  # Should be cuda:0

When a user queries our agent, it will decide whether to explain the topic using a Wikipedia article in text format, or by creating an image using Dall-E for visual understanding, or by suggesting YouTube videos for deeper comprehension.



In [None]:
#Tools

import ast
import re

def wikipedia(toic:str):
  print("wikipedia**************")
  wiki_api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
  wikipedia = WikipediaQueryRun(api_wrapper=wiki_api_wrapper)
  return wikipedia.invoke(toic)


def dalle(topic:str):
  dalle_api_wrapper = DallEAPIWrapper(model="dall-e-3", size="1024x1024")

  dalle = OpenAIDALLEImageGenerationTool(
    api_wrapper=dalle_api_wrapper
  )

  image_url = dalle.invoke(topic)
  return image_url

def youtube(topic:str):
  youtube = YouTubeSearchTool()
  results = youtube.run(topic)
  results_list = ast.literal_eval(results)
  return results_list

#results = youtube("Winmore Academy Whitefiele Bangalore")


In [None]:
#Agent
from langchain_openai import ChatOpenAI
from langchain.prompts.prompt import PromptTemplate
from langchain_core.tools import Tool
from langchain.agents import (
    create_react_agent,
    AgentExecutor
)
from langchain import hub

llm = openai_llm
#llm = os_llm


def agent_007(topic: str) -> str:

    template = """
   You are a helpful bot named Chandler. Your task is to explain topic {topic_name}
   asked by the user via three mediums: text, image or video.

   If the asked topic is best explained in text format, use the Wikipedia tool.
   If the topic is best explained by showing a picture of it, generate an image
   of the topic using Dall-E image generator and print the image URL.
   Finally, if video is the best medium to explain the topic, conduct a YouTube search on it
   and return found video links.
   """

    prompt_template = PromptTemplate(
        template=template, input_variables=["topic_name"]
    )

    tools_for_agent = [
        Tool(
            name="Wikipedia tool",
            func=wikipedia,
            description="A tool to explain things in text format. Use this tool if you think the user’s asked concept is best explained through text.",
        ),
        Tool(
            name="Dall-E image generator",
            func=dalle,
            description="A tool to generate images. Use this tool if you think the user’s asked concept is best explained through an image.",
        ),
        Tool(
            name="Youtube Search tool",
            func=youtube,
            description="A tool to search YouTube videos. Use this tool if you think the user’s asked concept can be best explained by watching a video.",
        )
    ]

    react_prompt = hub.pull("hwchase17/react")

    agent = create_react_agent(llm=llm, tools=tools_for_agent, prompt=react_prompt)
    agent_executor = AgentExecutor(agent=agent, tools=tools_for_agent, verbose=True)


    result = agent_executor.invoke(
        input={"input": prompt_template.format_prompt(topic_name=topic)}
    )

    return result["output"]


In [None]:
agent_007('what is the most famous michael jackson song on youtube?')

In [None]:
agent_007('Hows the bangalore city traffic looks like in a normal working day?')

In [None]:
agent_007('Narendra Modi')

In [None]:
agent_007('a visual representation of the earth')

In [None]:
display(Image(url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-uZ8XGmBrCDbMMojRLEFKSmvM/user-jJY7mx87k1jPs2adt0yL2vAG/img-QMc6i0dCQoA4A8xKqLXF8Xxs.png?st=2025-03-30T07%3A20%3A58Z&se=2025-03-30T09%3A20%3A58Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-03-30T07%3A07%3A17Z&ske=2025-03-31T07%3A07%3A17Z&sks=b&skv=2024-08-04&sig=vNOcwCjJ0eHU5reVy0itPnoUih%2Bzz5uy13XkuOyb0fM%3D'))