<a href="https://colab.research.google.com/github/sroy-10/genai/blob/main/LangChain/Langchain_v03_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -r requirements.txt -q

In [None]:
import langchain
print(langchain.__version__)

In [None]:
import os
from dotenv import load_dotenv
load_dotenv("var.env")

## Azure OpenAI

In [None]:
from langchain_openai import AzureChatOpenAI

os.environ["AZURE_OPENAI_API_KEY"] = os.environ["AZ_API_KEY"]
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZ_BASE") # https://YOUR-ENDPOINT.openai.azure.com/

llm = AzureChatOpenAI(
    azure_deployment=os.getenv("AZ_MODEL"),
    api_version=os.getenv("AZ_API_VERSION"),
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]

ai_msg = llm.invoke(messages)
ai_msg

In [None]:
result = llm.invoke("What is the capital of India?")
result.content

## Hugging Face Endpoints

In [None]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

# environment variable should be named as HUGGINGFACEHUB_API_TOKEN
llm = HuggingFaceEndpoint(
    repo_id="microsoft/Phi-3-mini-4k-instruct",
    # repo_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0", # error was coming with this repo
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

chat = ChatHuggingFace(llm=llm, verbose=True)
print(chat.invoke("What is the capital of India??").content)


messages = [
    ("system", "You are a helpful translator. Translate the user sentence to French."),
    ("human", "I love programming."),
  ]

print(chat.invoke(messages).content)

## Hugging Face Local (Model will be downloaded in local)

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

# incase the model needs to be downloaded to other local drives instead of c:
# import os
# os.environ['HF_HOME'] = 'D:/huggingface_cache'

llm = HuggingFacePipeline.from_model_id(
                    model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                    task = "text-generation",
                    pipeline_kwargs=dict(
                            temperature=0.5,
                            max_new_tokens=512,
                            do_sample=False,
                            repetition_penalty=1.03,
                    ),
                  )

model = ChatHuggingFace(llm=llm)
model.invoke("what is the capital of India?").content

## Embedding Model - OpenAi

#### Single Document

In [None]:
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model = 'text-embedding-3-large', diemnsion = 32)
result = embedding.embed_query("New Delhi is the capital of India")
print(str(result))

#### Multiple Documents

In [None]:
from langchain_openai import OpenAIEmbeddings
document = [
    "New Delhi is the capital of India",
    "Paris is the capital of France"
]
embedding = OpenAIEmbeddings(model = 'text-embedding-3-large', diemnsion = 32)
result = embedding.embed_documents(document)
print(str(result))

## Embedding Model - Hugging Face

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embedding = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
result = embedding.embed_query("New Delhi is the capital of India")
print(str(result))

In [None]:
from langchain_huggingface import HuggingFaceEndpointEmbeddings


## Embedding Model Query

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from sklearn.metrics.pairwise import cosine_similarity

documents = [
    "Virat Kohli is an Indian cricketer known for his aggressive batting and leadership.",
    "MS Dhoni is a former Indian captain famous for his calm demeanor and finishing skills.",
    "Sachin Tendulkar, also known as the 'God of Cricket', holds many batting records.",
    "Rohit Sharma is known for his elegant batting and record-breaking double centuries.",
    "Jasprit Bumrah is an Indian fast bowler known for his unorthodox action and yorkers."
]
query = 'tell me about kohli'

embedding = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')

document_embedding = embedding.embed_documents(documents)
query_embedding = embedding.embed_query(query)

similarity_score = cosine_similarity(X=[query_embedding], Y=document_embedding) # it needs 2d array
similarity_score

In [None]:
list(enumerate(similarity_score[0]))

In [None]:
index, score = sorted(list(enumerate(similarity_score[0])), key=lambda x: x[1], reverse=True)[0]
index, score

In [None]:
print("User → ", query)
print("Assistant → ", documents[index])
print("Score →", score)