In [4]:
from langchain_ollama import OllamaEmbeddings
from upstash_vector import Index, Vector
from decouple import config
from sklearn.random_projection import SparseRandomProjection
import numpy as np
import ollama


In [5]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Now you can access your environment variables
UPSTASH_VECTOR_REST_URL = os.getenv("UPSTASH_VECTOR_REST_URL")
UPSTASH_VECTOR_REST_TOKEN = os.getenv("UPSTASH_VECTOR_REST_TOKEN")

In [6]:
# Use Ollama embeddings model (outputs 4096 dims for "nomic-embed-text")
embeddings_model = OllamaEmbeddings(model="nomic-embed-text")

documents = [
    "The cat jumped over the dog",
    "The cow jumped over the moon",
    "The turkey ran in circles",
]

In [7]:
def get_embedding(text, model="llama3"):
    response = ollama.embeddings(model=model, prompt=text)
    return response["embedding"]



In [8]:
# Get Ollama embeddings
embeddings = [get_embedding(doc) for doc in documents]
embeddings = np.array(embeddings)
print("Original embedding shape:", embeddings.shape)

# Create and fit the dimension reduction model
transformer = SparseRandomProjection(n_components=1536, random_state=42)
reduced_embeddings = transformer.fit_transform(embeddings)
print("Reduced embedding shape:", reduced_embeddings.shape)

ConnectionError: Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download

In [None]:
dataset = {}
for i, embedding in enumerate(reduced_embeddings):
    dataset[i] = embedding

In [None]:
index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)

In [None]:
vectors = []
for i, embedding in enumerate(reduced_embeddings):
    vectors.append(Vector(id=str(i), vector=embedding.tolist()))

# Step 4: Push to Upstash index
index.upsert(vectors)
print("Inserted PCA-reduced vectors into Upstash!")

Inserted PCA-reduced vectors into Upstash!


In [None]:
# Create a new embedding and reduce its dimensions
new_embedding = get_embedding("The moose sat by the dog")
reduced_new_embedding = transformer.transform(np.array(new_embedding).reshape(1, -1))

# Add the reduced embedding to the dataset
dataset[3] = reduced_new_embedding.flatten()

In [None]:
index.upsert(vectors=[Vector(id="3", vector=dataset[3].tolist())])
print("Upserted new vector for ID 3")

Upserted new vector for ID 3


In [None]:
query_str = "The moose sat by the dog"
query_embedding = get_embedding(query_str)

# Reduce the query embedding's dimensions to match the index
query_embedding_reduced = transformer.transform(np.array(query_embedding).reshape(1, -1))

In [None]:
results = index.query(
  vector=query_embedding_reduced.flatten().tolist(),
  top_k=3,
  include_vectors=True,
  include_metadata=True
)

for result in results:
    print(result.id, result.score * 100)

3 100.0
0 77.01586499999999
2 66.43624
