In [1]:
import pandas as pd

%env GOOGLE_APPLICATION_CREDENTIALS=/Users/mike/.gcp-keys/pokemechs-84c9b2aac2f8.json

env: GOOGLE_APPLICATION_CREDENTIALS=/Users/mike/.gcp-keys/pokemechs-84c9b2aac2f8.json


In [2]:

from typing import List, Optional

from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel



In [3]:
def embed_text(
    texts: List[str],
    task: str = "RETRIEVAL_DOCUMENT",
    model_name: str = "text-embedding-004",
    dimensionality: Optional[int] = 256,
) -> List[List[float]]:
    """Embeds texts with a pre-trained, foundational model."""
    model = TextEmbeddingModel.from_pretrained(model_name)
    inputs = [TextEmbeddingInput(text, task) for text in texts]
    kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
    embeddings = model.get_embeddings(inputs, **kwargs)
    return [embedding.values for embedding in embeddings]


In [19]:

filename = './manuals/test-embedding/FunCatFacts.md'
with open(filename, 'r') as fd:
    text = fd.read()




In [20]:
embedded_test = embed_text([text])

In [21]:
import json


output_filename = './manuals/embedded/cat_test_embeddings.json'

with open(output_filename, 'w') as f:
    json.dump([list(emb) for emb in embedded_test], f) 


In [8]:
import psycopg2

In [9]:
psycopg2.connect

<function psycopg2.connect(dsn=None, connection_factory=None, cursor_factory=None, **kwargs)>

In [10]:
con =psycopg2.connect("user=postgres password=postgres host=127.0.0.1 port=64322 dbname=postgres")



In [22]:
yearmin = 2010
yearmax = 2013
with con.cursor() as cur:
    cur.execute("insert into subaru (name, yearmin, yearmax, embedding, text) values ('Cat', %s, %s, %s, %s)", (yearmin, yearmax, embedded_test[0], text))
    con.commit()


In [56]:

def search(query, topk=3):
    try:
        dimensionality: Optional[int] = 256
        model = TextEmbeddingModel.from_pretrained("text-embedding-004") 
        query_embedding = model.get_embeddings([TextEmbeddingInput(query, "RETRIEVAL_QUERY")], output_dimensionality=dimensionality)[0].values
        print(len(query_embedding))

        with con.cursor() as cur:

            cur.execute(
                """
                SELECT name, text, 1 - (embedding <-> %s::vector(256)) AS similarity 
                FROM subaru
                ORDER BY similarity DESC
                LIMIT %s
                """,
                (query_embedding, topk)
            )
            results = cur.fetchall()
    except Exception as e:
        print(f"Error executing query: {e}")
        con.rollback()
        return []
    else:
        con.commit()
    finally:
        cur.close() 

    return results

In [59]:
search_results = search("do cats purr")
for result in search_results:
    print(f"Name: {result[0]}, Similarity: {result[2]}\nText: {result[1]}\n---")

256
Name: Cat, Similarity: 0.521347812754258
Text: # Fun Cat **Facts:**

## Physical Traits:

1. **Purring Power:** A cat's purr vibrates at a frequency between 25 and 150 Hertz, which is thought to have healing properties for both cats and humans.

2. **Super Flexible:** Cats have 32 muscles in each of their ears, allowing them to rotate them 180 degrees.

3. **Retractable Claws:** Cats can retract their claws to keep them sharp and prevent wear and tear.

4. **Night Vision:** Cats have excellent night vision, thanks to a special membrane in their eyes that reflects light.

5. **Whiskers for Navigation:** Whiskers help cats judge whether they can fit through narrow spaces and detect changes in air currents, aiding in hunting and navigation.

Behavior:
6. **The Righting Reflex:** Cats can twist their bodies mid-air to land on their feet, thanks to an impressive righting reflex.

7. **The Flehmen Response:** That funny grimace cats make when they smell something interesting is called th