### Embedding Labs - pgvector

In [6]:
!pip install pgvector psycopg2 einops

Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m723.0 kB/s[0m eta [36m0:00:00[0m[36m0:00:01[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.8.0


In [1]:
import dspy
from dspy.functional import TypedPredictor
from pydantic import BaseModel, Field
from typing import List
from transitions import Machine
from dotenv import dotenv_values
from rich import print


secret = dotenv_values('../../.secret')
llm  = dspy.OpenAI(
    model='gpt-3.5-turbo-0125',
    # model='gpt-3.5-turbo',
    # model='gpt-4',
    # model='gpt-4o',
    api_key=secret['OPEN_AI_API_KEY'],
    max_tokens=4096
)

dspy.settings.configure(lm=llm)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import psycopg2

try:
    connection = psycopg2.connect(user="drfadul",
                                  password="*****",
                                  host="localhost",
                                  port="5432",
                                  database="synaia")
    cursor = connection.cursor()
    postgreSQL_select_Query = "SELECT applicant_id, lead_stage FROM hr_head_check ORDER BY applicant_id DESC LIMIT 1"

    cursor.execute(postgreSQL_select_Query)
    print("Selecting rows from mobile table using cursor.fetchall")
    mobile_records = cursor.fetchall()

    print("Print each row and it's columns values")
    for row in mobile_records:
        print("applicant_id = ", row[0], )
        print("lead_stage = ", row[1])

except (Exception, psycopg2.Error) as error:
    print("Error while fetching data from PostgreSQL", error)

finally:
    # closing database connection.
    if connection:
        cursor.close()
        connection.close()
        print("PostgreSQL connection is closed")


In [38]:
import numpy as np
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel


tokenizer_embed = AutoTokenizer.from_pretrained('bert-base-uncased')
model_embed = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, safe_serialization=True)
model_embed.eval()


def embedd(text: str):
    def mean_pooling(model_output, attention_mask):
        token_embeddings = model_output[0]
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


    encoded_input = tokenizer_embed(text, padding=True, truncation=True, return_tensors='pt')

    # + matryoshka_dim = 512

    with torch.no_grad():
        model_output = model_embed(**encoded_input)

    embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    # + embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
    # + embeddings = embeddings[:, :matryoshka_dim]
    embeddings = F.normalize(embeddings, p=2, dim=1)

    return np.array(embeddings)[0]

text = """GlobalConnect Solutions is a premier call center company specializing in providing top-tier customer service, technical support, and sales solutions to businesses of all sizes. 
        Our services are designed to help companies enhance customer satisfaction, streamline operations, and boost their bottom line.
        We serve a wide range of industries including healthcare, finance, retail, telecommunications, travel, and more. 
        Our versatile team is equipped to handle industry-specific requirements and deliver customized solutions. """
      
print(embedd(text).shape)


<All keys matched successfully>


In [224]:
import psycopg2
from psycopg2.extras import execute_values
from pgvector.psycopg2 import register_vector

text = """We believe in promoting from within and offer various opportunities for career advancement. Employees who demonstrate strong performance and leadership qualities may be considered for promotions and leadership roles."""
try:
    connection = psycopg2.connect(user="drfadul",
                                  password="*******",
                                  host="localhost",
                                  port="5432",
                                  database="synaia")
    
    register_vector(connection)
    cursor = connection.cursor()
    data = [
        (text, embedd(text=text))
    ]

    execute_values(cursor, "INSERT INTO company_info (text, embedding) VALUES %s", data)
    connection.commit()

    print('\nSuccess!')

except (Exception, psycopg2.Error) as error:
    print("Error while INSERTING data from PostgreSQL", error)

finally:
    # closing database connection.
    if connection:
        cursor.close()
        connection.close()
        print("PostgreSQL connection is closed")




In [246]:
import dspy
from dspy.functional import TypedPredictor
from dspy.retrieve.pgvector_rm import PgVectorRM
from pydantic import BaseModel, Field
from typing import List
from transitions import Machine
from dotenv import dotenv_values
from rich import print


secret = dotenv_values('../../.secret')
llm  = dspy.OpenAI(
    model='gpt-3.5-turbo-0125',
    # model='gpt-3.5-turbo',
    # model='gpt-4',
    # model='gpt-4o',
    api_key=secret['OPEN_AI_API_KEY'],
    max_tokens=4096
)

db_url = "postgresql://drfadul:*******@localhost/synaia"
retriever_model = PgVectorRM(
    db_url=db_url, 
    pg_table_name="company_info",
    k=3,
    embedding_func=embedd,
    embedding_field="embedding",
    fields=["text"],
    include_similarity=True
)
dspy.settings.configure(lm=llm)

class NotFound(dspy.Signature):
    """Generates a denial response related to the question in context"""
    context: str = dspy.InputField()
    response: str = dspy.OutputField(desc="often between 3 and 7 words")

class Veracity(dspy.Signature):
    context_provided: str = dspy.InputField(desc="may contain relevant facts")
    answer: str = dspy.InputField()
    answer_is_in_context_provided: bool = dspy.OutputField(desc="verify that the answer is in the context_provided, respond True or False")


class CompanySignature(dspy.Signature):
    """Answer questions with short factoid answers and friendly, use emoji. Answer should be in the context."""
    context: str = dspy.InputField(desc="may contain relevant facts")
    question: str = dspy.InputField(desc="user question to be answered")
    answer: str = dspy.OutputField(desc="often between 6 and 12 words")


class CompanyRelated(dspy.Module):
    def __init__(self):
        super().__init__()
        self.retriever = retriever_model
        self.predict = dspy.ChainOfThought(CompanySignature)
        self.veracity = dspy.TypedChainOfThought(Veracity)
        self.not_found = dspy.Predict(NotFound)
    
    def forward(self, question: str):
        context = self.retriever(question)
        context = [ctx['text'] for ctx in context]
        response = self.predict(context=context, question=question)
        veracity = self.veracity(context_provided=str(context), answer=response.answer)
        if veracity.answer_is_in_context_provided:
            r = response
            return {
                "answer": r.answer,
                "answer_is_in_context_provided": veracity.answer_is_in_context_provided
            }
        else:
            r = self.not_found(context=question)
            return {
                "answer": r.response,
                "answer_is_in_context_provided": veracity.answer_is_in_context_provided
            }


    

search = CompanyRelated()
# search(question="Good morning! What are the company's main products or services?")
# search(question="Hey! Can you provide some information about the company's technology stack?")
# search(question="Hello! How does the company foster employee growth and development?")
# search(question="Hi! How does the company encourage innovation among its employees?")
# search(question="Hi! What's the company's approach to employee wellness?")
search(question="What kind of benefits do you offer to employees?")
# search(question= "Can I work part time?",)
# search(question="What tools and technologies will I be using?")
# search(question="How diverse is your team?")


{'answer': 'Opportunities for career advancement, comprehensive training program, supportive environment. 🌟',
 'answer_is_in_context_provided': True}

In [247]:
llm.inspect_history(n=20)




Answer questions with short factoid answers and friendly, use emoji. Answer should be in the context.

---

Follow the following format.

Context: may contain relevant facts

Question: user question to be answered

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: often between 6 and 12 words

---

Context:
[1] «We believe in promoting from within and offer various opportunities for career advancement. Employees who demonstrate strong performance and leadership qualities may be considered for promotions and leadership roles.»
[2] «All new employees undergo a comprehensive training program that includes an introduction to our company culture, systems, and procedures. Depending on your role, you may receive additional training on specific tools, products, or services.»
[3] «Our work environment is dynamic, inclusive, and supportive. We value collaboration, innovation, and respect. We strive to create a space where employees feel valued and motivate

'\n\n\nAnswer questions with short factoid answers and friendly, use emoji. Answer should be in the context.\n\n---\n\nFollow the following format.\n\nContext: may contain relevant facts\n\nQuestion: user question to be answered\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: often between 6 and 12 words\n\n---\n\nContext:\n[1] «We believe in promoting from within and offer various opportunities for career advancement. Employees who demonstrate strong performance and leadership qualities may be considered for promotions and leadership roles.»\n[2] «All new employees undergo a comprehensive training program that includes an introduction to our company culture, systems, and procedures. Depending on your role, you may receive additional training on specific tools, products, or services.»\n[3] «Our work environment is dynamic, inclusive, and supportive. We value collaboration, innovation, and respect. We strive to create a space where employees 