In [None]:
#@title Setup Environment { display-mode: "form" }

!pip install langgraph
!pip install -U langchain langchain_openai langchainhub tavily-python
!pip3 install google-api-python-client
# Youtube API Client
!pip3 install youtube_transcript_api

In [None]:
#@title Define the YoutubeSearchTool { display-mode: "form" }

from langchain.tools.base import BaseTool
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
import json
from pydantic import Field


class YoutubeSearchTool(BaseTool):
    """Tool that fetches search results from YouTube."""

    name: str = "YoutubeSearchTool"
    youtube_api_key: str = Field(...,
                                 description="API key for accessing Youtube data.")
    description: str = (
        "A tool that fetches search results from YouTube based on a query.\n"
        "Arguments:\n"
        "- query: The search term to look for on YouTube.\n"
        "- youtube_api_key: The API key to access YouTube data.\n\n"
        "Output Format:\n"
        "- Title: Displayed after translation to Japanese.\n"
        "- first_280_chars_of_transcript:This field contains the first 280 characters of the video's transcript.\n"
        "- viewCount: Number of times the video has been viewed.\n"
        "- likeCount: Number of likes the video has received.\n"
        "- Description: Displayed after translation to Japanese.\n"
        "- Published Date: Displayed as 'publishedAt'.\n"
        "- Video Link: Formatted as https://www.youtube.com/watch?v={video_id}."
    )

    def __init__(self, youtube_api_key: str, *args, **kwargs):
        if not youtube_api_key:
            raise ValueError("A valid Youtube developer key must be provided.")
        kwargs["youtube_api_key"] = youtube_api_key
        super().__init__(*args, **kwargs)

    def _run(self, q: str, max_results: int = 100) -> str:
        YOUTUBE_API_SERVICE_NAME = "youtube"
        YOUTUBE_API_VERSION = "v3"
        youtube = build(YOUTUBE_API_SERVICE_NAME,
                        YOUTUBE_API_VERSION, developerKey=self.youtube_api_key)

        search_response = youtube.search().list(
            q=q,
            part="id,snippet",
            order='date',  # Sort by published date
            type='video',
            maxResults=max_results
        ).execute()

        videos = search_response['items']
        video_list = []

        for video in videos:
            video_data = {}
            video_id = video['id']['videoId']
            video_data['video_id'] = video_id
            video_data['title'] = video['snippet']['title']
            video_data['publishedAt'] = video['snippet']['publishedAt']
            video_data['description'] = video['snippet']['description']

            # Fetch viewCount and likeCount for each video
            video_response = youtube.videos().list(
                part="statistics",
                id=video_id
            ).execute()
            statistics = video_response["items"][0]["statistics"]
            video_data['viewCount'] = statistics.get("viewCount", "0")
            video_data['likeCount'] = statistics.get("likeCount", "0")

            # Only add videos with more than 1000 views to the list
            if int(video_data['viewCount']) >= 1000:
                video_list.append(video_data)

        # Sort the video list by 'publishedAt' in descending order and take the first 5
        latest_5_videos = sorted(
            video_list, key=lambda x: x['publishedAt'], reverse=True)[:5]

        # Get first 280 characters of transcript for each video
        for video in latest_5_videos:
            video_id = video['video_id']
            try:
                transcript = YouTubeTranscriptApi.get_transcript(
                    video_id, languages=['en', 'ja'])
                transcript_text = [entry['text'] for entry in transcript]
                transcript_string = ' '.join(transcript_text)
                first_280_chars = transcript_string[:280]
                video['first_280_chars_of_transcript'] = first_280_chars
            except:
                video['first_280_chars_of_transcript'] = "Transcript not available"

        # Convert to JSON format
        items_json = json.dumps(latest_5_videos)
        return items_json

    async def _arun(self, q: str, max_results: int = 100) -> str:
        """Use the YoutubeSearchTool asynchronously."""
        return self._run(q, max_results)

In [None]:
#@title Set API keys as Environment Variables
import os
from google.colab import userdata

# OpenAI and Tavily API
openai_api_key = 'OPENAI_API_KEY'  # @param {type: "string"}
tavily_api_key = 'TAVILY_API_KEY'  # @param {type: "string"}
# LangSmith
langchain_tracing_v2 = 'true'  # @param {type: "string"}
langchain_api_key = 'LANGCHAIN_API_KEY'  # @param {type: "string"}
youtube_api_key= 'YOUTUBE_API_KEY'   # @param {type: "string"}

# Check if environment variables are set and display error messages
if not openai_api_key:
    print("OPENAI_API_KEY is not set.")
if not tavily_api_key:
    print("TAVILY_API_KEY is not set.")
if not langchain_tracing_v2:
    print("LANGCHAIN_TRACING_V2 is not set.")
if not langchain_api_key:
    print("LANGCHAIN_API_KEY is not set.")
if not youtube_api_key:
    print("YOUTUBE_API_KEY is not set.")

import os

os.environ['OPENAI_API_KEY'] = openai_api_key
os.environ['TAVILY_API_KEY'] = tavily_api_key
os.environ['LANGCHAIN_TRACING_V2'] = langchain_tracing_v2
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['YOUTUBE_API_KEY'] = youtube_api_key



In [None]:
#@title Integrating AI Tools into a Unified Workflow with LangChain

# Import necessary libraries and
from langchain import hub
from langchain.agents import create_openai_functions_agent
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.runnables import RunnablePassthrough
from langchain_core.agents import AgentFinish
from langgraph.graph import END, Graph
import os

# Define the tools to be used
tools = [
    TavilySearchResults(max_results=1),
    YoutubeSearchTool(name="YoutubeSearch", youtube_api_key=os.environ.get('YOUTUBE_API_KEY'))
]

# Retrieve the prompt to be used
prompt = hub.pull("hwchase17/openai-functions-agent")

# Define the model to be used
model = 'gpt-3.5-turbo-1106'  # @param {type: "string"}

# Choose the Large Language Model (LLM)
llm = ChatOpenAI(model=model)

# Construct the OpenAI Functions agent
agent_runnable = create_openai_functions_agent(llm, tools, prompt)

# Define the agent
agent = RunnablePassthrough.assign(agent_outcome=agent_runnable)

# Define the function to execute tools
def execute_tools(data):
    agent_action = data.pop('agent_outcome')
    tool_to_use = {t.name: t for t in tools}[agent_action.tool]
    observation = tool_to_use.invoke(agent_action.tool_input)
    data['intermediate_steps'].append((agent_action, observation))
    return data

# Define the logic to decide whether to continue
def should_continue(data):
    if isinstance(data['agent_outcome'], AgentFinish):
        return "exit"
    else:
        return "continue"

# Build the workflow
workflow = Graph()
workflow.add_node("agent", agent)
workflow.add_node("tools", execute_tools)
workflow.set_entry_point("agent")
workflow.add_conditional_edges("agent", should_continue, {"continue": "tools", "exit": END})
workflow.add_edge('tools', 'agent')

# Compile into a LangChain Runnable
chain = workflow.compile()


In [None]:
#@title Run Workflow With Text
text = "Search and collect articles and  videos about LangChain"  # @param {type: "string"}
result = chain.invoke({"input": text, "intermediate_steps": []})
result['agent_outcome'].return_values['output']

"Here are some articles and videos about LangChain:\n\n### Articles\n1. [LangChain - Chains](https://blog.paperspace.com/langchain/)\n   - LangChain is a versatile interface for various Language Model Models (LLMs), offering a centralized development. The article discusses the possibilities of chatGPT and other LLMs in our lives and how LangChain can be used to build powerful AI models.\n\n### Videos\n1. [What Is Retrieval Augmented Generation (RAG)](https://www.youtube.com/watch?v=yj4-3SALiIk)\n   - Published Date: February 9, 2024\n   - Description: Explains Retrieval Augmented Generation and how it can be used to build powerful AI apps with large language models like chatGPT.\n   - Views: 1362, Likes: 117\n\n2. [Gemini + Google Retrieval Agent from a LangChain Template](https://www.youtube.com/watch?v=rZus0JtRqXE)\n   - Published Date: February 9, 2024\n   - Description: Demonstrates the use of langchain-cli to quickly bootstrap a LangChain agent using Google's `gemini-pro` model.\n