# Embeddings

In [2]:
from dotenv import load_dotenv
import openai
import os

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [3]:
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding()
embed = embed_model.get_text_embedding("Hellow World")

print(len(embed))
print(embed[:10])


1536
[-0.0048995171673595905, 0.00447823153808713, 0.003778300480917096, -0.0277584008872509, -0.012598758563399315, -0.0012124397326260805, -0.0015847963513806462, -0.007112095132470131, -0.01742197945713997, -0.021336285397410393]


### Custom Embeddings

We are gonna use the intstructor embeddings from huggingface

In [7]:
# Instal dependencies
# !pip install InstructorEmbedding torch transformers sentence_transformers

You should consider upgrading via the '/Users/raoofmac/Documents/coding/learning/genai/venv/bin/python3 -m pip install --upgrade pip' command.[0m


In [10]:
from InstructorEmbedding import INSTRUCTOR
model = INSTRUCTOR('hkunlp/instructor-large')
sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
instruction = "Represent the Science title:"
embeddings = model.encode([[instruction,sentence]])
print(embeddings)

TypeError: _load_sbert_model() got an unexpected keyword argument 'token'

In [11]:
from typing import Any, List
from InstructorEmbedding import INSTRUCTOR
from llama_index.embeddings.base import BaseEmbedding

class InstructorEmbeddings(BaseEmbedding):
    def __init__(
        self, 
        instructor_model_name: str = "hkunlp/instructor-large",
        instruction: str = "Represent the Computer Science text for retrieval:",
        **kwargs: Any,
    ) -> None:
        self._model = INSTRUCTOR(instructor_model_name)
        self._instruction = instruction
        super().__init__(**kwargs)

    def _get_query_embedding(self, query: str) -> List[float]:
        embeddings = model.encode([[self._instruction, query]])
        return embeddings[0].tolist()
    
    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)

    def _get_text_embedding(self, text: str) -> List[float]:
        embeddings = model.encode([[self._instruction, text]])
        return embeddings[0].tolist() 
    
    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        embeddings = model.encode([[self._instruction, text] for text in texts])
        return embeddings.tolist()

ModuleNotFoundError: No module named 'llama_index.embeddings.base'

In [None]:
# set the batch size to 1 to avoid memory issues
# if you have a large GPU, you can increase this
instructor_embeddings = InstructorEmbeddings(embed_batch_size=1)

In [None]:
embed = instructor_embeddings.get_text_embedding("How do I create a vector index?")
print(len(embed))
print(embed[:10])

### Custom Embeddings With Llama Index

In [None]:
from llama_index import ServiceContext, set_global_service_context
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=instructor_embeddings, chunk_size=512)
set_global_service_context(service_context)

In [None]:
import os
import sys
sys.path.append(os.path.join(os.getcwd(), '..'))

from llama_docs_bot.indexing import create_query_engine

# remove any existing indices
# !rm -rf ./*_index

query_engine = create_query_engine()

In [None]:
response = query_engine.query('What is the Sub Question query engine?')
response.print_response_stream()

In [None]:
print(response.get_formatted_sources(length=256))

In [None]:
from llama_index.embeddings.openai import OpenAIEmbedding

service_context = ServiceContext.from_defaults(llm=llm, embed_model=OpenAIEmbedding(), chunk_size=512)
set_global_service_context(service_context)

# delete old vector index so we can re-create it
!rm -rf ./*_index

In [None]:
query_engine = create_query_engine()

response = query_engine.query('What is the Sub Question query engine?')
response.print_response_stream()

In [None]:
print(response.get_formatted_sources(length=256))