In [36]:
# ===== SETUP (run first in a fresh Colab) =====
!pip -q install langchain langchain-openai langchain-community grandalf

from google.colab import userdata
import os
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.runnables import (
    RunnableLambda, RunnableSequence, RunnableParallel
)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
print("Setup complete.")


Setup complete.


In [37]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

# Initialize the ChatOpenAI model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

# Initialize memory
memory = ConversationBufferMemory()

# Create a ConversationChain with the LLM and memory
conversation = ConversationChain(llm=llm, memory=memory, verbose=False)

print("Start chatting with the AI. Type 'exit' or 'quit' to end the conversation.")

while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'quit']:
        break
    try:
        response = conversation.invoke(user_input)
        print("Bot:", response['response'])
    except Exception as e:
        print(f"An error occurred: {e}")

print("Conversation ended.")

Start chatting with the AI. Type 'exit' or 'quit' to end the conversation.
You: what are embedding & what are vectors in LLMs
Bot: In the context of large language models (LLMs), embeddings and vectors are fundamental concepts that play a crucial role in how these models understand and process language.

**Embeddings** refer to the numerical representations of words, phrases, or even entire sentences in a continuous vector space. The idea behind embeddings is to convert discrete language elements into a format that machines can work with, allowing them to capture semantic meanings. For example, similar words tend to have similar embeddings, which helps the model understand relationships between words based on their context.

**Vectors** are essentially the mathematical representation of these embeddings. In a high-dimensional space, each word or phrase is represented as a point (or vector) with specific coordinates. The dimensions of these vectors can vary depending on the model, but c

In [32]:
#
# 1 Embeddings
# 2 Vectors
# 3 Vector Store
# 4 Vector Databases

In [42]:
from langchain_openai import OpenAIEmbeddings

# Initialize the OpenAI Embeddings model
# You can choose different models depending on your needs.
# "text-embedding-ada-002" is a common and cost-effective choice.
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")

# Define the text you want to get embeddings for
#text_input = "This is an example sentence for embedding."

text_input = "Who is Sachin Tendulkar? Which is his city of residence?"

# Split the text into words
words = text_input.split()

# Get and display the embedding for each word
print("Embeddings for each word:")
for word in words:
    embedding = embeddings_model.embed_query(word)
    print(f"Word: '{word}'")
    print("Embedding (first 10 values):", embedding[:10]) # Print only the first 10 for brevity
    print("Embedding dimension:", len(embedding))
    print("-" * 20) # Separator for clarity

Embeddings for each word:
Word: 'Who'
Embedding (first 10 values): [0.0015458944253623486, -0.01619558408856392, 0.012150187976658344, -0.011870229616761208, -0.027715865522623062, 0.015691658481955528, -0.014431847259402275, -0.014683809131383896, 0.007565871346741915, -0.01690947823226452]
Embedding dimension: 1536
--------------------
Word: 'is'
Embedding (first 10 values): [-0.012280227616429329, -0.011882741935551167, -6.717383803334087e-05, -0.0054462566040456295, -0.02213369868695736, 0.0112411854788661, -0.013165854848921299, -0.016624681651592255, 0.009637294337153435, -0.023695748299360275]
Embedding dimension: 1536
--------------------
Word: 'Sachin'
Embedding (first 10 values): [0.0026786571834236383, 0.005073593929409981, 0.012023082002997398, 0.021549422293901443, -0.04841276630759239, 0.017357030883431435, -0.017130054533481598, -0.01190959382802248, -0.009099089540541172, -0.022897928953170776]
Embedding dimension: 1536
--------------------
Word: 'Tendulkar?'
Embedding 

In [47]:
from langchain_openai import OpenAIEmbeddings

# Initialize the OpenAI Embeddings model
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")

# Take dynamic user input
text_input = input("Please enter the text you want to embed: ")

# Split the text into words
words = text_input.split()

# Get and display the embedding for each word
print("\nEmbeddings for each word:")
for word in words:
    embedding = embeddings_model.embed_query(word)
    print(f"Word: '{word}'")
    print("Embedding (first 10 values):", embedding[:])
    print("Embedding dimension:", len(embedding))
    print("-" * 20)

Please enter the text you want to embed: Dhoni

Embeddings for each word:
Word: 'Dhoni'
Embedding (first 10 values): [-0.013232818804681301, 0.0016590746818110347, -0.0029750638641417027, 0.004796565510332584, -0.05110148712992668, 0.028295427560806274, -0.023018211126327515, 0.0024413755163550377, -0.005449587944895029, -0.020565232262015343, 0.036091916263103485, -0.005078326445072889, 0.00533688347786665, 0.004770047031342983, -0.0004992969334125519, -0.001978956162929535, 0.024954073131084442, -0.023734215646982193, 0.039406754076480865, -0.03145115077495575, -0.02552422508597374, 0.0037755963858217, 0.0289583932608366, 0.01522171963006258, -0.0011262149782851338, 0.007968862541019917, 0.006493760738521814, -0.019942043349146843, 0.02288561686873436, -0.007332413922995329, 0.03344004973769188, -0.01548690628260374, -0.0036628921516239643, -0.022103317081928253, -0.016932174563407898, -0.006961152423173189, -0.01086602732539177, 0.013935564085841179, 0.0073456731624901295, 0.0053998

In [55]:
from langchain_openai import OpenAIEmbeddings

# Initialize the OpenAI Embeddings model
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")

# Basic in-memory vector store (using a dictionary)
vector_store = {}

# Take dynamic user input
text_input = input("Please enter the text you want to embed and store: ")

# Split the text into words
words = text_input.split()

# Get embeddings for each word and store in the vector store
print("\nGenerating embeddings and storing in vector store:")
for word in words:
    embedding = embeddings_model.embed_query(word)
    vector_store[word] = embedding
    print(f"Stored embedding for word: '{word}'")

# Display the contents of the vector store (optional)
print("\nContents of the vector store:")
for word, embedding in vector_store.items():
    print(f"Word: '{word}'")
    print("Embedding (first 10 values):", embedding[:10])
    print("Embedding dimension:", len(embedding))
    print("-" * 20)

Please enter the text you want to embed and store: hello

Generating embeddings and storing in vector store:
Stored embedding for word: 'hello'

Contents of the vector store:
Word: 'hello'
Embedding (first 10 values): [-0.025122925639152527, -0.019487135112285614, -0.02802019938826561, -0.03106299601495266, -0.024752499535679817, 0.027543935924768448, -0.012554848566651344, -0.008420612663030624, -0.017608538269996643, -0.008347849361598492]
Embedding dimension: 1536
--------------------


In [56]:
print(type(vector_store))
print(vector_store)

<class 'dict'>
{'hello': [-0.025122925639152527, -0.019487135112285614, -0.02802019938826561, -0.03106299601495266, -0.024752499535679817, 0.027543935924768448, -0.012554848566651344, -0.008420612663030624, -0.017608538269996643, -0.008347849361598492, 0.03251824900507927, 0.004187154583632946, -0.024712810292840004, -0.0006296441424638033, 0.014129165560007095, -0.0015941614983603358, 0.03939761593937874, 0.00207538646645844, 0.02704121172428131, -0.012409322895109653, -0.021048223599791527, 0.008923335000872612, 0.008394152857363224, -0.00326770031824708, -0.005410888232290745, -0.009624501690268517, 0.011165744625031948, -0.0015660487115383148, 0.00355544313788414, -0.023297248408198357, 0.006866139359772205, -0.007818667218089104, -0.023905808106064796, -0.008910105563700199, 0.006846295204013586, -0.013705819845199585, 0.00959804281592369, -0.014115936122834682, 0.022013980895280838, -0.01051749661564827, 0.0033652682323008776, -0.014671577140688896, 0.00533481827005744, -0.014922

In [None]:
# FAISS -> Facebook AI Similarity Search -> Vector Store

In [58]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Initialize the OpenAI Embeddings model
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")

# Sample text to embed
texts = ["This is the first document.", "This is the second document.", "This is the third document."]

# Create a FAISS vector store from the texts and embeddings
vector_store = FAISS.from_texts(texts, embeddings_model)

# You can now use this vector store for similarity search, etc.
print("FAISS vector store created successfully.")

# Example: Perform a similarity search
query = "This is a query document."
docs = vector_store.similarity_search(query)

print("\nSimilarity search results:")
for doc in docs:
    print(doc.page_content)

FAISS vector store created successfully.

Similarity search results:
This is the first document.
This is the third document.
This is the second document.


In [57]:
!pip install -q faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [59]:
# End of the Notebbok

In [60]:
# Assignment => Explore - Vector Stores and Vector Databases - examples too