In [5]:
import os
import openai
import dotenv

dotenv.load_dotenv()

True

In [6]:
# ! pip install llama-index
# ! pip install pypdf

In [7]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./data/eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [8]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.schema.Document'>
Doc ID: 74762706-9539-42cd-8069-01241f2c3b96
Text: PAGE 1Founder, DeepLearning.AICollected Insights from Andrew Ng
How to  Build Your Career in AIA Simple Guide


In [9]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [13]:
len(document.text)

48464

In [14]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
service_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-small-en-v1.5"
)
index = VectorStoreIndex.from_documents([document],
                                        service_context=service_context)

  from .autonotebook import tqdm as notebook_tqdm
Downloading config.json: 100%|██████████| 743/743 [00:00<00:00, 3.44MB/s]
Downloading pytorch_model.bin: 100%|██████████| 134M/134M [00:07<00:00, 18.3MB/s] 
Downloading tokenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 2.01MB/s]
Downloading vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 1.55MB/s]
Downloading tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 15.5MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 699kB/s]
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/mykhailokilianovskyi/Library/Caches/llama_index
[nltk_data]     ...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [15]:
query_engine = index.as_query_engine()

In [16]:
response = query_engine.query(
    "What are steps to take when finding projects to build your experience?"
)
print(str(response))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


When finding projects to build your experience, there are several steps you can take. First, you can join existing projects by asking to join someone else's project if they have an idea. Additionally, you can continue reading, taking courses, and talking to domain experts to come up with new ideas. Another approach is to focus on a specific application area where machine learning has not yet been applied. This can give you the opportunity to explore unique and creative applications that no one else has done yet. Finally, you can develop a side hustle or personal project that may or may not develop into something bigger, which can help stir your creative juices and strengthen bonds with collaborators.


In [17]:
eval_questions = [
    'What are the keys to building a career in AI?', 
    'How can teamwork contribute to success in AI?', 
    'What is the importance of networking in AI?', 
    'What are some good habits to develop for a successful career?', 
    'How can altruism be beneficial in building a career?',
    'What is imposter syndrome and how does it relate to AI?', 
    'Who are some accomplished individuals who have experienced imposter syndrome?', 
    'What is the first step to becoming good at AI?', 
    'What are some common challenges in AI?', 
    'Is it normal to find parts of AI challenging?', 
    'What is the right AI job for me?']

In [19]:
! pip install trulens-eval

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting trulens-eval
  Downloading trulens_eval-0.18.3-py3-none-any.whl (627 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m627.0/627.0 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting cohere>=4.4.1 (from trulens-eval)
  Downloading cohere-4.37-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.9/48.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Collecting kaggle>=1.5.13 (from trulens-eval)
  Downloading kaggle-1.5.16.tar.gz (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.6/83.6 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting langchain>=0.0.335 (from trulens-eval)
  Downloading langchain-0.0.348-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting merkle-json>=1.0.0 (from trulen

In [20]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [24]:
import numpy as np
from trulens_eval import (
    Feedback,
    TruLlama,
    OpenAI
)

openai = OpenAI()

from trulens_eval.feedback import Groundedness

qa_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input_output()
)

qs_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
    .on_input()
    .on(TruLlama.select_source_nodes().node.text)
    .aggregate(np.mean)
)

#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
grounded = Groundedness(groundedness_provider=openai)

groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(TruLlama.select_source_nodes().node.text)
        .on_output()
        .aggregate(grounded.grounded_statements_aggregator)
)

feedbacks = [qa_relevance, qs_relevance, groundedness]

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [26]:
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                             app_id="Direct Query Engine")

In [27]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

boto3,botocore is/are required for using BedrockEndpoint. You should be able to install it/them with
	pip install boto3 botocore


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [28]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [29]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_d379af8da1703010a4e5691ca040bc3f,"""What are the keys to building a career in AI?""","""The keys to building a career in AI are learn...",-,"{""record_id"": ""record_hash_d379af8da1703010a4e...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-08T08:17:02.566579"", ""...",2023-12-08T08:17:07.059485,1.0,0.95,1.0,[{'args': {'prompt': 'What are the keys to bui...,[{'args': {'prompt': 'What are the keys to bui...,"[{'args': {'source': 'PAGE 1Founder, DeepLearn...",4,2126,0.003243
1,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_efa5d9cde52ad861e987e55e4582002d,"""How can teamwork contribute to success in AI?""","""Collaborating and working in teams is crucial...",-,"{""record_id"": ""record_hash_efa5d9cde52ad861e98...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-08T08:17:07.271974"", ""...",2023-12-08T08:17:11.492718,1.0,0.0,1.0,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'source': 'Hopefully the previous c...,4,1725,0.002638
2,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_bdb03b8c7575896b1cc8a3e19da52f17,"""What is the importance of networking in AI?""","""Networking is important in AI because it allo...",-,"{""record_id"": ""record_hash_bdb03b8c7575896b1cc...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-08T08:17:11.645573"", ""...",2023-12-08T08:17:14.642035,1.0,0.0,0.25,[{'args': {'prompt': 'What is the importance o...,[{'args': {'prompt': 'What is the importance o...,[{'args': {'source': 'Hopefully the previous c...,2,1702,0.002592
3,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_5f648a34742aa4a183d5ba471692ce34,"""What are some good habits to develop for a su...","""Developing good habits is crucial for a succe...",-,"{""record_id"": ""record_hash_5f648a34742aa4a183d...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-08T08:17:14.792669"", ""...",2023-12-08T08:17:18.211844,1.0,0.5,1.0,[{'args': {'prompt': 'What are some good habit...,[{'args': {'prompt': 'What are some good habit...,[{'args': {'source': 'Hopefully the previous c...,3,1677,0.002557
4,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_34f2ea18d35b409a28365e6a052438a3,"""How can altruism be beneficial in building a ...","""Altruism can be beneficial in building a care...",-,"{""record_id"": ""record_hash_34f2ea18d35b409a283...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-08T08:17:18.375542"", ""...",2023-12-08T08:17:21.178126,1.0,0.0,0.5,[{'args': {'prompt': 'How can altruism be bene...,[{'args': {'prompt': 'How can altruism be bene...,[{'args': {'source': 'Hopefully the previous c...,2,1668,0.002539
