In [None]:
# concept
# Fine tune the LLM on our dataset, and also fine tune the embedding model
# 1. Splitting -> Chunk 1 -> Embeddings 1 -> Build Semantic Index (vector store)
#         \_-> Chunk 2 -> Embedding 2 ->-/    
# 2. User Query -> Compute Embedding -> Semantic Search on our Knowledge base -> Retrieve Chunks relevant to question -> Context -> LLM  
# 3. Source: Talk to Your Documents, Powered by Llama-Index by Prompt Engineering  

# !pip install --upgrade llama-index==0.10.1
# !pip install openai==1.3.5
# !pip install --upgrade llama-index[huggingface]
# !pip install sentence-transformers
# !pip install transformers
#!pip install -q accelerate
#import sys
#print(sys.executable)
# !pip list #Adding the ! before the command tells Jupyter to execute it as a shell command

In [1]:
import os
from dotenv import load_dotenv
# Load the .env file
load_dotenv()
# Get the API key
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display

# import data and create a vector store. Then call a query
documents = SimpleDirectoryReader("data").load_data() # import all the user data
index = VectorStoreIndex.from_documents(documents) # if openai key is given, then llama index would use open AI embeddings
query_engine = index.as_query_engine()
response = query_engine.query("Does the author recommend a full time job?")
display(Markdown(f"<b>{response}</b>"))
index.storage_context.persist() # json files are created for future use

<b>The author does not recommend a full-time job, especially for those who are young and considering starting a startup.</b>

In [3]:
# recreate the vector store from persistent storage
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")

index = load_index_from_storage(storage_context=storage_context)
query_engine = index.as_query_engine()
response = query_engine.query("Would the author recommend a startup in this day and age of competition?")
print(response)

The author would recommend starting a startup despite the competition, as they highlight the benefits and positive experiences associated with founding a startup.


In [None]:
# customise the LLM for generating response to query
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display
# define LLM: https://gpt-index.readthedocs.io/en/latest/core_modules/model_modules/llms/usage_custom.html
llm = OpenAI(model="gpt-3.5-turbo", temperature=0, max_tokens=256) # temp=0 more deterministic
# recreate the vector store. Use default OpenAI embedding model
index = VectorStoreIndex.from_documents(documents,llm=llm, chunk_size=800, chunk_overlap=20)
query_engine = index.as_query_engine()

response = query_engine.query("what does the author recommend if you like predictability?")
display(Markdown(f"<b>{response}</b>"))

<b>The author suggests that if you like predictability, starting a startup may not be the best option as it involves a high level of uncertainty. Instead, the author recommends considering working for established companies like Microsoft, where the future trajectory is more predictable.</b>

In [None]:
# Get embeddings only and then add to vector store using a custom ID
from llama_index.embeddings.openai import OpenAIEmbedding
embed_model_openai = OpenAIEmbedding()
text_embedding = embed_model_openai.get_text_embedding("The best way to live is to be your own boss")

[0.009341972880065441, -0.009304252453148365, -0.007085062563419342, -0.04096385836601257, -0.010813049972057343, 0.008304674178361893, -0.027963051572442055, 0.01422041840851307, -0.003721700981259346, -0.014195271767675877, -0.01657162792980671, 0.023021738976240158, 0.01482393778860569, 0.0012424006126821041, -0.01687338761985302, -0.008958486840128899, 0.047778595238924026, -0.01093249674886465, 0.016395602375268936, 0.005950320977717638, -0.011466862633824348, 0.011240542866289616, 0.0070284828543663025, -0.011209109798073769, -0.0009602868813090026, -0.003592824563384056, -0.009216239675879478, -0.013365433551371098, 0.018570786342024803, -0.02238049916923046, 0.018067853525280952, -0.00200544367544353, 0.008254380896687508, -0.016621921211481094, -0.022933725267648697, -0.01328999362885952, -0.017124854028224945, -0.006500403396785259, 0.024693990126252174, -0.009373405948281288, 0.015113123692572117, 0.003919730894267559, 0.005466248374432325, 0.008707020431756973, -0.030452568

In [None]:
# alternative free Open Source embeddings
#!pip install llama-index-embeddings-huggingface
from llama_index.core import Document, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
embed_model_huggingface = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

more_info = "The best way to live is to be your own boss"
new_document = Document(text=more_info)

mainData = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(mainData, embed_model=embed_model_huggingface)
index.insert(new_document)

query_engine = index.as_query_engine()
response = query_engine.query("Does the author think a good life is a 9 to 5 job or being author of our own destiny?")
display(Markdown(f"<b>{response}</b>"))
response = query_engine.query("Is there something this essay tells us, NOT to do with our time?")
display(Markdown(f"<b>{response}</b>"))


  from .autonotebook import tqdm as notebook_tqdm


<b>The author believes that a good life is not defined by a 9 to 5 job but rather by being the author of our own destiny.</b>

<b>To not stick to the old model and default ways of living, but instead consider taking risks and exploring new opportunities, such as starting a startup or pursuing non-traditional career paths.</b>