In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ['NUMEXPR_NUM_THREADS'] = '2'
# import numexpr as ne

In [2]:
import os
os.environ["OPENAI_API_KEY"] = "sk-xxx"
import openai

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)
openai.api_key = api_key=os.getenv('OPENAI_API_KEY')


In [3]:
from pathlib import Path
from llama_index import download_loader, VectorStoreIndex, load_index_from_storage
from llama_index.storage.storage_context import StorageContext

PDFReader = download_loader("PDFReader")

loader = PDFReader()

class PodcastTitle:
    def __init__(self, name, about, file, key):
        self.name = name
        self.about = about   
        self.file = file
        self.key = key

# title: name, about, file, key 
podcast_titles = [
    PodcastTitle("10 Tools for Managing Stress and Anxiety with Huberman", "manage stress and anxiety", "behaviour/10_Tools_for_Managing_Stress_&_Anxiety_Huberman_Lab_Podcast_10.pdf", "tools_for_stress"),
    PodcastTitle("The Science of Setting and Achieving Goals with Huberman", "set and achieve goals", "behaviour/55_The_Science_of_Setting_&_Achieving_Goals_Huberman_Lab_Podcast_55.pdf", "setting_goals"),
    PodcastTitle("Dr Chris Palmer Diet and Nutrition for Mental Health with Huberman", "have healthy diet for mental health", "food/99_Dr_Chris_Palmer_Diet_&_Nutrition_for_Mental_Health_Huberman_Lab_Podcast_99.pdf", "diet_nutrition"),
]

podcast_vector_index = {}
for podcast in podcast_titles:
    # try/except to read documents only the first time
    try:
        storage_context = StorageContext.from_defaults(persist_dir=f"./storage/cache/{podcast.key}_vector")
        podcast_vector_index[podcast.key] = load_index_from_storage(storage_context)
    except:
        documents = loader.load_data(file=Path(f"..\\assets\\AndrewHuberman\\{podcast.file}"))
        vector_index = VectorStoreIndex.from_documents(documents)
        podcast_vector_index[podcast.key] = vector_index
        vector_index.storage_context.persist(persist_dir=f".\\storage\\cache\\{podcast.key}_vector")




INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.
INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.
INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.


In [4]:
podcast_vector_index

{'tools_for_stress': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1bc7cbdaca0>,
 'setting_goals': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1bc006cd670>,
 'diet_nutrition': <llama_index.indices.vector_store.base.VectorStoreIndex at 0x1bc014c2f40>}

In [5]:
from llama_index.indices.postprocessor import KeywordNodePostprocessor  # to exclude keywords
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata


# exclude keywords
node_processor = KeywordNodePostprocessor(
    exclude_keywords=["supplements", "LMNT", "InsideTracker", "Helix", "ROKA", "Athletic Greens", "Thesis", "Eight Sleep"]
)

query_engine_tools = []
podcats_vector_engines = {}

# retirever > engines > tools
# VectorIndexRetriever > RetrieverQueryEngine > QueryEngineTool
for podcast in podcast_titles:
    retriever = VectorIndexRetriever(
        index = podcast_vector_index[podcast.key],
        similarity_top_k=3,  # number of similar documents to return to the LLM
    )

    podcats_vector_engines[podcast.key] = RetrieverQueryEngine(
        retriever = retriever,
        node_postprocessors=[node_processor]  # used to filter the reurned nodes
    )

    new_tool = QueryEngineTool(
        query_engine=podcats_vector_engines[podcast.key],
        metadata = ToolMetadata(
            name=f"{podcast.key}_vector_tool",
            description=f"Useful for retrieving specific context from a podcast {podcast.name}. "
            f"Use when you need information related to {podcast.about}.",
        )

    )
    query_engine_tools.append(new_tool)

In [6]:
query_engine_tools

[<llama_index.tools.query_engine.QueryEngineTool at 0x1bc0295a190>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x1bc0295a3d0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x1bc0295a610>]

In [7]:
from llama_index.agent import OpenAIAgent
from llama_index.llms import OpenAI

agent = OpenAIAgent.from_tools(query_engine_tools, 
                               llm=OpenAI(temperature=0, 
                                          model="gpt-4"),  # gpt-3.5-turbo-0613
                               verbose=True)

In [8]:
openai.log = "debug"

In [9]:
response = agent.chat("Tell me about Ketogenic diet")

STARTING TURN 1
---------------

INFO:openai._base_client:Retrying request to /chat/completions in 0.935613 seconds
Retrying request to /chat/completions in 0.935613 seconds
INFO:openai._base_client:Retrying request to /chat/completions in 1.558817 seconds
Retrying request to /chat/completions in 1.558817 seconds
INFO:openai._base_client:Retrying request to /chat/completions in 3.625344 seconds
Retrying request to /chat/completions in 3.625344 seconds


APIConnectionError: Connection error.

In [None]:
response.sources

NameError: name 'response' is not defined

In [None]:
pip show certifi

Name: certifiNote: you may need to restart the kernel to use updated packages.

Version: 2023.11.17
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://github.com/certifi/python-certifi
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Location: c:\users\hc138yy\appdata\local\miniconda3\envs\llamaagents\lib\site-packages
Requires: 
Required-by: httpcore, httpx, requests
