In [10]:
from crewai import Agent, Task, Crew, Process, LLM
from crewai.tools import tool
from pydantic import BaseModel, Field
from typing import List
from tavily import TavilyClient
import os

In [11]:
os.environ["GEMINI_API_KEY"] = "AIzaSyDJF_6G6ahe5l4fAl2dJnigJ9RC8TtjXi0"
basic_llm = LLM(model="gemini/gemini-1.5-flash", temperature=0)
search_client = TavilyClient(api_key="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")

In [12]:
# make dir to put the final output in
output_dir = "./ai-agent-output"
os.makedirs(output_dir, exist_ok=True)

# define the max number of results 
results_num = 5

## Setup Agents

### Agent: A

In [13]:
class SuggestedSearchQueries(BaseModel):
    queries: List[str] = Field(..., title="Suggested search queries to help the user self-learn a topic",
                               min_items=1, max_items=10)

search_queries_recommendation_agent = Agent(
    role="Search Queries Recommendation Agent",
    goal="\n".join([
        "Generate a list of well-structured and general-purpose search queries based on a given learning topic and user level.",
        "Queries should be informative, specific, and useful for self-learning.",
        "Do not include the name of any platform, website, or content type.",
        "The goal is to create queries that can be used in general-purpose search engines."
    ]),
    backstory=(
        "This agent helps learners by generating highly relevant search queries for any topic and skill level. "
        "It avoids mentioning specific sources, allowing downstream agents to handle targeted search in different content formats."
    ),
    llm=basic_llm,
    verbose=True
)

search_queries_recommendation_task = Task(
    description="\n".join([
        "The user is interested in learning about: {topic_name}",
        "Their current skill level is: {learning_level} (beginner, intermediate, or advanced).",
        f"Generate up to 10 clear, focused, and diverse search queries.",
        "These queries should be general and not include any source/platform names (e.g., avoid YouTube, Coursera, GitHub, etc.).",
        "Incorporate the learning level implicitly into the query wording (e.g., 'beginner guide to ...', 'advanced concepts in ...').",
        "Avoid vague or overly broad phrases; be precise while keeping queries adaptable for different formats.",
        "Keep the phrasing natural, as a human would search in a general search engine."
    ]),
    expected_output="A JSON object containing a list of suggested general-purpose search queries.",
    output_json=SuggestedSearchQueries,
    output_file=os.path.join(output_dir, "step_1_suggested_search_queries.json"),
    agent=search_queries_recommendation_agent
)

### Agent: B

In [14]:
class SingleSearchResult(BaseModel):
    title: str
    url: str = Field(..., title="The page URL")

class AllSearchResults(BaseModel):
    results: List[SingleSearchResult]

# Tool to perform a filtered visual learning search
@tool
def visual_search_tool(query: str) -> List[SingleSearchResult]:
    """ Searches only within  for visual resources"""
    youtube_query = search_client.search(query=query+"site:youtube.com", max_results=results_num)
    udemy_query = search_client.search(query="free"+query+"site:udemy.com", max_results=results_num)
    coursera_query = search_client.search(query="free"+query+"site:coursera.org", max_results=results_num)
    
    return [youtube_query, udemy_query, coursera_query]

# Define the agent
visual_search_agent = Agent(
    role="Visual Learning Resources Agent",
    goal="To find visual educational resources (videos, playlists, courses) based on given search queries.",
    backstory="You specialize in finding helpful video-based learning materials for advanced learners from YouTube, Coursera, Udemy, and Khan Academy.",
    llm=basic_llm,
    verbose=True,
    tools=[visual_search_tool]
)

# Define the task
visual_search_task = Task(
    description="\n".join([
    "Use the provided general search queries to find ONLY visual educational resources.",
    "Search exclusively within YouTube, Coursera, and Udemy. Ignore other sources.",
    "Make sure to retrieve 5 links from each website [youtube.com, udemy.com, coursera.org]",
    "Exclude any non-visual resources such as blog posts, PDFs, GitHub repos, or written research papers.",
    "Return only video links, playlists, or course landing pages with high educational value.",
    "Ensure all results are relevant to the query and tailored to learners at the '{learning_level}' level.",
    "Results should be clear, informative, and suitable for visual learning formats."
]),
    expected_output="A JSON file containing valid visual search results.",
    output_json=AllSearchResults,
    output_file=os.path.join(output_dir, "step_2_visual_results.json"),
    agent=visual_search_agent
)


### Agent: C

In [15]:
class SingleSearchResult(BaseModel):
    title: str
    url: str = Field(..., title="The page URL")

class AllSearchResults(BaseModel):
    results: List[SingleSearchResult]

@tool
def text_search_tool(query: str) -> List[SingleSearchResult]:
    """ Searches only within medium.com, arxiv.org, github.com, paperswithcode.com, distill.pub for text resources"""
    medium_query = search_client.search(query=query+"site:medium.com", max_results=results_num)
    arxiv_query = search_client.search(query=query+"site:arxiv.org", max_results=results_num)
    github_query = search_client.search(query=query+"site:github.com", max_results=results_num)
    paperswithcode_query = search_client.search(query=query+"site:paperswithcode.com", max_results=results_num)
    return [medium_query, arxiv_query, github_query, paperswithcode_query]



# Define the agent
text_search_agent = Agent(
    role="Textual Learning Resources Agent",
    goal="To find rich, insightful, and trustworthy educational resources from top-tier domains.",
    backstory="You're a research assistant who specializes in discovering valuable content like articles, GitHub repos, research papers, and technical blogs from well-known educational platforms.",
    llm=basic_llm,
    verbose=True,
    tools=[text_search_tool]
)

# Define the task
text_search_task = Task(
    description="\n".join([
    "Use the available search tools to find **educational resources** related to a given query.",
    "Each tool searches **only within a trusted domain** (e.g., Medium, arXiv, GitHub, PapersWithCode, Distill).",
    "Make sure to retrieve 5 links from each website [medium.com, arxiv.org, github.com, paperswithcode.com, distill.pub]",
    "Focus on text-based, high-quality results such as: blog posts, technical articles, GitHub repositories, and academic papers.",
    "Avoid video-based content entirely (e.g., YouTube, Coursera, Udemy).",
    "Ensure that the results are relevant and valuable to the level of user which {learning_level}",
    "Return **at least 5 relevant results** per query if possible.",
    "Each result must include a title and a working URL.",
]),
    expected_output="A JSON file with the best educational search results.",
    output_json=AllSearchResults,
    output_file=os.path.join(output_dir, "step_3_textual_results.json"),
    agent=text_search_agent
)


### Agent: D

In [16]:
summary_markdown_agent = Agent(
    role="Markdown Learning Report Designer",
    goal="Create a clean, structured Markdown (.md) report from visual and textual sources.",
    backstory=(
        "This agent builds a clean Markdown summary from visual and textual learning sources. "
        "The format is simple, readable, and structured with bullet points for easy navigation."
    ),
    llm=basic_llm,
    verbose=True,
)

summary_markdown_task = Task(
    description="\n".join([
        "Create a fully structured Markdown (.md) report file that contains all visual and textual learning resources.",
        "Structure should be:",
        "1. Centered Title: 'Summary Report' (use markdown heading)",
        "2. Visual Resources Section:",
        "- List each visual result from the agent (Visual Learning Resources Agent) as a bullet point like this: [Title](URL)",
        "3. Textual Resources Section:",
        "- List each textual result from the agent (Textual Learning Resources Agent) as a bullet point like this: [Title](URL)",
        "Important Notes:",
        "- Do NOT include any HTML tags.",
        "- Do NOT include any code blocks like ```.",
        "- The file should be clean Markdown only, without any extra formatting.",
    ]),
    expected_output="A structured Markdown file with all links.",
    output_file=os.path.join(output_dir, "summary_report.md"),
    agent=summary_markdown_agent,
)


## Run the AI Crew

In [17]:
flow = Crew(
    agents=[
        search_queries_recommendation_agent,
        visual_search_agent,
        search_queries_recommendation_agent,
        text_search_agent,
        summary_markdown_agent
    ],
    
    tasks=[
        search_queries_recommendation_task,
        visual_search_task,
        search_queries_recommendation_task,
        text_search_task,
        summary_markdown_task
    ],
    
    process=Process.sequential
)

In [18]:
flow.kickoff(
    inputs={
        'topic_name':"Data Science",
        'learning_level':'Intermediate'
    }
)

[1m[95m# Agent:[00m [1m[92mSearch Queries Recommendation Agent[00m
[95m## Task:[00m [92mThe user is interested in learning about: Data Science
Their current skill level is: Intermediate (beginner, intermediate, or advanced).
Generate up to 10 clear, focused, and diverse search queries.
These queries should be general and not include any source/platform names (e.g., avoid YouTube, Coursera, GitHub, etc.).
Incorporate the learning level implicitly into the query wording (e.g., 'beginner guide to ...', 'advanced concepts in ...').
Avoid vague or overly broad phrases; be precise while keeping queries adaptable for different formats.
Keep the phrasing natural, as a human would search in a general search engine.[00m


[1m[95m# Agent:[00m [1m[92mSearch Queries Recommendation Agent[00m
[95m## Final Answer:[00m [92m
{
  "queries": [
    "intermediate data science projects with python",
    "practical applications of machine learning for data science",
    "data visualization 

CrewOutput(raw='# Summary Report\n\n## Visual Learning Resources\n\n- [5 Unique Python Projects (beginner to intermediate) - YouTube](https://www.youtube.com/watch?v=_xf1TMs0ysk)\n- [Python in Data Science for Intermediate - YouTube](https://www.youtube.com/playlist?list=PLXovS_5EZGh4_ThQVgO2boGf31Dqs5vzm)\n- [End-to-End Data Science Projects with Python - YouTube](https://www.youtube.com/playlist?list=PLTsu3dft3CWg69zbIVUQtFSRx_UV80OOg)\n- [5 Awesome Data Science Projects Using Python - YouTube](https://www.youtube.com/watch?v=9QNRhSAxPjY)\n- [Mastering Python Through Projects: 20 Projects For Intermediates](https://www.youtube.com/watch?v=5OnI5PGSUKE)\n- [Data Science Projects with Python - Udemy](https://www.udemy.com/course/data-science-projects-with-python/)\n- [Python Projects: Python & Data Science with Python Projects - Udemy](https://www.udemy.com/course/python-projects-python-data-science-with-python-projects/)\n- [Data Science & Data Analytics Real World Projects | Udemy](ht