In [20]:
import os
import cohere
client = cohere.Client(api_key=os.getenv("COHERE_API_KEY"))

# Show example embedding a text sentence. then show some info on the embedding using cohere methods and properties.

# Example text to embed
text = "This is an example sentence for embedding."

# Get the embedding
embedding = client.embed(texts=[text], model="embed-english-v3.0", input_type="search_query")


In [21]:
from rich.console import Console
from rich.pretty import Pretty

console = Console()

# Assuming 'response' is your EmbeddingsFloatsEmbedResponse object
truncated_response = embedding.dict()
truncated_response['embeddings'] = [emb[:5] + ['...'] for emb in truncated_response['embeddings']]

console.print(Pretty(truncated_response))

In [22]:
import chromadb
from chromadb.utils import embedding_functions
import cohere
import os

# Initialize Chroma client
chroma_client = chromadb.Client()

# Create a custom Cohere embedding function that returns lists
class CohereEmbeddingFunction(embedding_functions.EmbeddingFunction):
    def __init__(self, api_key, model_name="embed-english-v3.0"):
        self.client = cohere.Client(api_key=api_key)
        self.model_name = model_name

    def __call__(self, texts):
        embeddings = self.client.embed(texts=texts, model=self.model_name, input_type="search_document").embeddings
        return [list(embedding) for embedding in embeddings]  # Convert tuples to lists

# Create a Cohere embedding function
cohere_ef = CohereEmbeddingFunction(api_key=os.getenv("COHERE_API_KEY"))

# Use get_or_create=True to either get the existing collection or create a new one
collection = chroma_client.create_collection(
    name="cohere_embeddings",
    embedding_function=cohere_ef,
    get_or_create=True
)

# Example: Add a document to the collection
collection.add(
    documents=["The ideal ph for cannabis is 6.8"],
    metadatas=[{"source": "example"}],
    ids=["1"]
)

# Example: Query the collection
results = collection.query(
    query_texts=["The ideal ph for cannabis is 6.8"],
    n_results=1
)

print("Query results:", results)

Insert of existing embedding ID: 1
Add of existing embedding ID: 1


In [18]:
import cohere
import numpy as np
import os

# Initialize Cohere client
cohere_client = cohere.Client(api_key=os.getenv("COHERE_API_KEY"))

# Get embeddings
query = "The ideal ph for cannabis is 6.8."
document = "The ideal ph for cannabis is 6.8"

query_embedding = cohere_client.embed(texts=[query], model="embed-english-v3.0", input_type="search_query").embeddings[0]
doc_embedding = cohere_client.embed(texts=[document], model="embed-english-v3.0", input_type="search_query").embeddings[0]

# Calculate cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

similarity = cosine_similarity(query_embedding, doc_embedding)
distance = 1 - similarity

print(f"Cosine Similarity: {similarity}")
print(f"Distance: {distance}")

# Calculate Euclidean distance
euclidean_distance = np.linalg.norm(np.array(query_embedding) - np.array(doc_embedding))
print(f"Euclidean Distance: {euclidean_distance}")

In [19]:
import chromadb
from chromadb.utils import embedding_functions
import cohere
import os

# Initialize Chroma client
chroma_client = chromadb.Client()

# Create a custom Cohere embedding function
class CohereEmbeddingFunction(embedding_functions.EmbeddingFunction):
    def __init__(self, api_key, model_name="embed-english-v3.0"):
        self.client = cohere.Client(api_key=api_key)
        self.model_name = model_name

    def __call__(self, texts):
        # Use search_document for storing, search_query for querying
        input_type = "search_document" if len(texts) > 1 or len(texts[0].split()) > 10 else "search_query"
        embeddings = self.client.embed(texts=texts, model=self.model_name, input_type=input_type).embeddings
        return [list(embedding) for embedding in embeddings]

# Create a Cohere embedding function
cohere_ef = CohereEmbeddingFunction(api_key=os.getenv("COHERE_API_KEY"))

# Get or create a collection
collection = chroma_client.get_or_create_collection(
    name="cannabis_info",
    embedding_function=cohere_ef
)

# Add some documents to the collection (if not already added)
documents = [
    "The ideal pH for cannabis is 6.8",
    "Cannabis requires a lot of nitrogen during vegetative growth",
    "Indica strains typically have broader leaves than Sativa",
    "LED lights are energy-efficient for growing cannabis",
    "Hydroponics can lead to faster growth in cannabis plants"
]

collection.add(
    documents=documents,
    ids=[f"doc_{i}" for i in range(len(documents))]
)

# Query for the top k similar items
k = 3  # Change this to get more or fewer results
query_text = "What's the best pH level for growing marijuana?"

results = collection.query(
    query_texts=[query_text],
    n_results=k
)

print(f"Top {k} results for query: '{query_text}'")
for i, (doc, distance) in enumerate(zip(results['documents'][0], results['distances'][0]), 1):
    print(f"{i}. Document: {doc}")
    print(f"   Distance: {distance}")
    print()

In [None]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

embedding_function = SentenceTransformerEmbeddingFunction()
print(embedding_function([token_split_texts[10]]))