In [6]:
# Set ENV Variables
import os

os.environ['OPENAI_BASE_URL'] = "http://localhost:1234/v1"
os.environ['OPENAI_API_KEY'] = "test"

In [7]:
import requests
from langchain.embeddings.base import Embeddings
from typing import List


# Define a class that uses the HTTP API to get embeddings
class HTTPEmbeddingModel(Embeddings):
    def __init__(self, api_url: str, model_name: str):
        """
        Initialize with the base URL of the HTTP server and model name.
        
        :param api_url: The API endpoint that returns the embeddings.
        :param model_name: The model to use when making the request.
        """
        self.api_url = api_url
        self.model_name = model_name
    
    def get_embedding(self, text: str) -> List[float]:
        """
        Get the embedding for a single piece of text by making an HTTP request.
        
        :param text: The text to get embeddings for.
        :return: A list of floats representing the embedding.
        """
        payload = {
            "model": self.model_name,
            "input": text
        }

        response = requests.post(self.api_url, json=payload, headers={"Content-Type": "application/json"})
        
        if response.status_code != 200:
            raise ValueError(f"Error getting embedding: {response.text}")
        
        response_json = response.json()

        # Extract the first embedding from the "data" field
        embedding_data = response_json.get("data", [])
        if len(embedding_data) == 0:
            raise ValueError("No embeddings found in the response.")

        # Assuming we are interested in the first embedding returned
        return embedding_data[0].get("embedding", [])
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """
        Embed a list of documents (texts).
        
        :param texts: List of documents to embed.
        :return: A list of lists, where each inner list is an embedding.
        """
        embeddings = []
        for text in texts:
            embedding = self.get_embedding(text)
            embeddings.append(embedding)
        return embeddings
    
    def embed_query(self, text: str) -> List[float]:
        """
        Embed a single query (text).
        
        :param text: The query text to embed.
        :return: A list of floats representing the embedding.
        """
        return self.get_embedding(text)


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import SQLiteVSS
from typing import List

# Instantiate the HTTP embedding model
api_url = "http://127.0.0.1:1234/v1/embeddings"
model_name = "nomic-embed-text-v1.5"
embd = HTTPEmbeddingModel(api_url=api_url, model_name=model_name)
texts = [
    "Prompt engineering is a methodology that focuses on the design and implementation of prompts to influence behavior.",
    "Kshitij Shah is a software engineer who does no work.",
]

# Add the documents to the vectorstore using the custom HTTP embedding model
db = SQLiteVSS.from_texts(
    texts=texts,      # Extract the text from the document chunks
    embedding=embd,   # Use your custom embedding model here
    table="state_union",
    db_file="/tmp/vss.db",
)

# Perform a similarity search on the vectorstore
query = "Tell me about the coders in the company"
data = db.similarity_search(query)

print("Embedding Done!")

In [None]:
data[0].page_content

In [18]:
### Router

from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

from pydantic import BaseModel, Field

# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["vectorstore", "web_search"] = Field(
        ...,
        description="Given a user question choose to route it to web search or a vectorstore.",
    )

    class Config:
        schema_extra = {
            "example": {
                "datasource": "vectorstore"
            }
        }

# Generate JSON schema
route_query_schema = RouteQuery.model_json_schema()


# LLM with function call
llm = ChatOpenAI(model="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", temperature=0)


* 'schema_extra' has been renamed to 'json_schema_extra'


'The current weather in Sydney is cloudy with some sunshine. The high temperature is 26°C and the low temperature tonight will be around 16°C. There will be mainly clear skies early, then areas of low clouds forming later.\n\nAs for traffic conditions in Sydney, there is currently a traffic jam in the middle western part of the city.'