# Knowledge Sources in CrewAI

## String knowledge source

In [205]:
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource

# Define the knowledge
policy_text = """Our return policy allows customers to return any product within 30 days of purchase.
                 Refunds will be issued only if the item is unused and in original packaging.
                 Customers must provide proof of purchase when requesting a return."""

# Create a StringKnowledgeSource object
return_policy_knowledge = StringKnowledgeSource(content=policy_text)


In [206]:
from dotenv import load_dotenv
import os
load_dotenv()


os.environ["AZURE_API_KEY"] = os.getenv("AZURE_API_KEY")
os.environ["AZURE_API_BASE"] = os.getenv("AZURE_API_BASE")
os.environ["AZURE_API_VERSION"] = os.getenv("AZURE_API_VERSION")

In [207]:
from crewai import LLM
from dotenv import load_dotenv
load_dotenv()

llm = LLM(model="azure/gpt-4o")


In [208]:
from crewai import Agent

returns_agent = Agent(
    role="Product Returns Assistant",
    goal="Answer customer questions about return policy accurately.",
    backstory="You work in customer service and specialize in returns, refunds, and policies.",
    allow_delegation=False,
    verbose=True,
    llm=llm
)


In [209]:
from crewai import Task

returns_task = Task(
    description="Answer the following customer question about returns: {question}",
    expected_output="A concise and accurate answer.",
    agent=returns_agent
)


In [210]:
from crewai import Crew, Process

crew = Crew(
    agents=[returns_agent],
    tasks=[returns_task],
    process=Process.sequential,
    knowledge_sources=[return_policy_knowledge],  # This is key
    verbose=True
)


[93m 


In [211]:
result = crew.kickoff(inputs={
    "question": "Can I get a refund if I used the item once?"
})

from pprint import pprint
pprint(result.raw)

[1m[95m# Agent:[00m [1m[92mProduct Returns Assistant[00m
[95m## Task:[00m [92mAnswer the following customer question about returns: Can I get a refund if I used the item once?[00m


[1m[95m# Agent:[00m [1m[92mProduct Returns Assistant[00m
[95m## Final Answer:[00m [92m
Refund eligibility for items that have been used once typically depends on the store’s return policy and the condition of the item. Many stores only offer refunds for unused, unopened, and resalable items unless the product is defective or did not meet its intended purpose. It is recommended to check the specific return policy of the store or brand from which the item was purchased for accurate guidance.[00m




('Refund eligibility for items that have been used once typically depends on '
 'the store’s return policy and the condition of the item. Many stores only '
 'offer refunds for unused, unopened, and resalable items unless the product '
 'is defective or did not meet its intended purpose. It is recommended to '
 'check the specific return policy of the store or brand from which the item '
 'was purchased for accurate guidance.')


## Text Knowledge Source

In [212]:
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource

text_source = TextFileKnowledgeSource(
    file_paths=["hr_policy.txt"]
)

In [213]:
from crewai import Agent, Task, Crew, Process


hr_agent = Agent(
    role="HR Policy Assistant",
    goal="Answer employee questions about HR policies.",
    backstory="You're a reliable HR knowledge assistant.",
    knowledge_sources=[text_source],
    llm=llm
)

task = Task(
    description="What is the leave policy for new employees?",
    expected_output="A clear summary of the leave policy.",
    agent=hr_agent
)


In [214]:
crew = Crew(
    agents=[hr_agent],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


ValueError: Invalid Knowledge Configuration: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys

## PDF source

In [None]:
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource

pdf_source = PDFKnowledgeSource(
    file_paths=["meeting_notes.pdf"]
)



In [None]:
meeting_summarizer = Agent(
    role="Meeting Note Summarizer",
    goal="Provide concise summaries of weekly meetings.",
    backstory="You help the team stay updated on discussions.",
    knowledge_sources=[pdf_source],
    llm=llm
)

task = Task(
    description="Summarize the key action items from last week's meeting.",
    expected_output="A bullet-point list of action items.",
    agent=meeting_summarizer
)


In [None]:
crew = Crew(
    agents=[meeting_summarizer],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


ValueError: Invalid Knowledge Configuration: Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys

## CSV source

In [None]:
from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource

csv_source = CSVKnowledgeSource(
    file_paths=["feedback.csv"]
)

In [None]:
feedback_analyst = Agent(
    role="User Feedback Analyst",
    goal="Identify common themes in user feedback.",
    backstory="You specialize in converting raw feedback into insights.",
    knowledge_sources=[csv_source],
    llm=llm
)

task = Task(
    description="What are the three most common complaints users had last month?",
    expected_output="A short list of recurring issues.",
    agent=feedback_analyst
)

In [None]:
crew = Crew(
    agents=[feedback_analyst],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


## JSON source

In [None]:
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource

json_source = JSONKnowledgeSource(
    file_paths=["company_info.json"]
)

In [None]:
company_expert = Agent(
    role="Company Info Specialist",
    goal="Answer questions about company structure and data.",
    backstory="You are an internal data assistant for org-level queries.",
    # knowledge_sources=[json_source],
    llm=llm
)

task = Task(
    description="How many teams are working on the product and what are their names?",
    expected_output="A list of team names and their sizes.",
    agent=company_expert
)


In [None]:
crew = Crew(
    agents=[company_expert],
    tasks=[task],
    process=Process.sequential,
    verbose=True,
    knowledge_sources=[json_source]
)

result = crew.kickoff()
print(result)


## Custom embedding model

In [None]:
ollama_embedder = {
    "provider": "ollama",
    "config": {
        "model": "nomic-embed-text",  # Must match or be compatible with Ollama's supported embedding models
        "api_url": "http://localhost:11434"
    }
}

In [None]:
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource

# Internal onboarding FAQ
faq_content = """
- You can access your email via portal.company.com using your employee credentials.
- The standard work hours are from 9am to 6pm, Monday to Friday.
- All reimbursement requests must be submitted by the 5th of the following month.
- For any IT-related issues, contact support@company.com.
"""

# Create a string knowledge source
faq_knowledge = StringKnowledgeSource(content=faq_content, embedder=ollama_embedder)


In [None]:
from crewai import Agent

hr_faq_agent = Agent(
    role="HR Assistant",
    goal="Answer onboarding-related questions for new hires.",
    backstory="You are a helpful assistant who knows everything about internal policies and onboarding processes.",
    allow_delegation=False,
    verbose=True,
    embedder=ollama_embedder
)


In [None]:
from crewai import Task

task = Task(
    description="Answer this onboarding question: {question}",
    expected_output="A short, accurate answer based on internal HR documentation.",
    agent=hr_faq_agent,
    embedder=ollama_embedder
)


In [None]:
from crewai import Crew, Process

crew = Crew(
    agents=[hr_faq_agent],
    tasks=[task],
    knowledge_sources=[faq_knowledge],
    embedder=ollama_embedder,
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff(inputs={
    "question": "What are the working hours and how do I get reimbursed?"
})

from pprint import pprint
pprint(result.raw)


# Custom knowledge source

In [None]:
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from typing import Dict, Any
from pydantic import Field
import requests

class WeatherKnowledgeSource(BaseKnowledgeSource):
    """Knowledge source that fetches weather data from an external API."""

    city: str = Field(description="City for which weather should be fetched")

    def load_content(self) -> Dict[Any, str]:
        try:
            print(f"Fetching weather for {self.city}...")

            # Open-Meteo API (no key needed for basic data)
            endpoint = "https://api.open-meteo.com/v1/forecast"
            params = {
                "latitude": 37.77,  # San Francisco by default
                "longitude": -122.42,
                "current_weather": True
            }

            response = requests.get(endpoint, params=params)
            response.raise_for_status()

            weather_data = response.json().get("current_weather", {})
            formatted = self.validate_content(weather_data)
            return {self.city: formatted}

        except Exception as e:
            raise ValueError(f"Failed to fetch weather data: {str(e)}")

    def validate_content(self, data: dict) -> str:
        if not data:
            return "No weather data available."

        return (
            f"Current weather in {self.city}:\n"
            f"- Temperature: {data.get('temperature')}°C\n"
            f"- Wind Speed: {data.get('windspeed')} km/h\n"
            f"- Weather Code: {data.get('weathercode')}\n"
            f"- Time: {data.get('time')}"
        )

    def add(self) -> None:
        """Process and chunk the content."""
        content = self.load_content()
        for _, text in content.items():
            chunks = self._chunk_text(text)
            self.chunks.extend(chunks)
        self._save_documents()


In [None]:
from crewai import Agent, LLM

weather_knowledge = WeatherKnowledgeSource(city="San Francisco")

weather_agent = Agent(
    role="Weather Reporter",
    goal="Answer questions about the current weather forecast.",
    backstory="You are a friendly meteorologist who provides real-time weather updates.",
    knowledge_sources=[weather_knowledge],
    llm=LLM(model="gpt-4o", temperature=0.0),
    verbose=True
)


In [None]:
from crewai import Task, Crew, Process

task = Task(
    description="What is the current temperature and wind speed in San Francisco?",
    expected_output="A concise weather summary for San Francisco.",
    agent=weather_agent
)

crew = Crew(
    agents=[weather_agent],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)


In [None]:
result = crew.kickoff()
print(result)
