In [1]:
import logging
import os
from datetime import timedelta
from functools import lru_cache
from typing import Optional

from dotenv import load_dotenv
from pydantic import BaseModel, Field

load_dotenv(dotenv_path="./.env")


def setup_logging():
    """Configure basic logging for the application."""
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )


class LLMSettings(BaseModel):
    """Base settings for Language Model configurations."""

    temperature: float = 0.0
    max_tokens: Optional[int] = None
    max_retries: int = 3


class OpenAISettings(LLMSettings):
    """OpenAI-specific settings extending LLMSettings."""

    api_key: str = Field(default_factory=lambda: os.getenv("OPENAI_API_KEY"))
    default_model: str = Field(default="gpt-4o")
    embedding_model: str = Field(default="text-embedding-3-small")


class DatabaseSettings(BaseModel):
    """Database connection settings."""

    service_url: str = Field(default_factory=lambda: os.getenv("TIMESCALE_SERVICE_URL"))


class VectorStoreSettings(BaseModel):
    """Settings for the VectorStore."""

    table_name: str = "embeddings"
    embedding_dimensions: int = 1536
    time_partition_interval: timedelta = timedelta(days=7)


class Settings(BaseModel):
    """Main settings class combining all sub-settings."""

    openai: OpenAISettings = Field(default_factory=OpenAISettings)
    database: DatabaseSettings = Field(default_factory=DatabaseSettings)
    vector_store: VectorStoreSettings = Field(default_factory=VectorStoreSettings)


@lru_cache()
def get_settings() -> Settings:
    """Create and return a cached instance of the Settings."""
    settings = Settings()
    setup_logging()
    return settings

# Coding the Vectorstore from Scratch

In [39]:
from openai import OpenAI
from timescale_vector import client

def initialize_clients(settings):

	"""Initializing all necessary clients."""
	
	# Initializing VectorStore
	service_url = settings.database.service_url
	table_name = settings.vector_store.table_name
	embedding_dimensions = settings.vector_store.embedding_dimensions
	time_partition_interval = settings.vector_store.time_partition_interval

	# Creating vectorstore connection using timescale
	vec_client = client.Sync(
		service_url=service_url,
		table_name=table_name,
		num_dimensions=embedding_dimensions,
		time_partition_interval=time_partition_interval
	)

	# Initializing OpenAI Client
	openai_api_key = settings.openai.api_key
	openai_client = OpenAI(api_key=openai_api_key)

	# Initializing Embedding Model
	embeddding_model = settings.openai.embedding_model

	return vec_client, openai_client, embeddding_model

In [40]:
settings = get_settings()
vec_client, openai_client, embeddding_model = initialize_clients(settings)

In [48]:
def get_embedding(openai_client, text):

	# Removing \n with space
	text = text.replace('\n', ' ')
	# Creating Embedding
	embedding = (openai_client.embeddings.create(input=[text], model=embeddding_model).data[0].embedding)

	return embedding

In [47]:
embedding

[-0.01666725054383278,
 -0.03096848726272583,
 -0.007268919143825769,
 0.047346558421850204,
 -0.057520415633916855,
 -0.04834553971886635,
 0.011337148025631905,
 0.021451855078339577,
 -0.02330523170530796,
 -0.01370973326265812,
 -0.029785480350255966,
 -0.02648620679974556,
 -0.0330716110765934,
 -0.014708717353641987,
 0.00822189636528492,
 0.021622734144330025,
 -0.006920589134097099,
 0.0025073171127587557,
 -0.015313364565372467,
 0.056889478117227554,
 0.05894002318382263,
 0.009595499373972416,
 -0.011770917102694511,
 0.007334641646593809,
 0.0381716825067997,
 0.02204335853457451,
 0.004863472189754248,
 -0.010765361599624157,
 -0.004432989284396172,
 -0.0024038038682192564,
 0.053708504885435104,
 -0.021504433825612068,
 0.009648077189922333,
 0.024974586442112923,
 0.004462564364075661,
 0.05086928978562355,
 -0.03102106600999832,
 0.022555995732545853,
 -0.005326816346496344,
 -0.07902485132217407,
 -0.011665760539472103,
 -0.057257525622844696,
 0.0255923792719841,
 0.0