In [1]:
def test_function(x: int=1, y: int=2) -> int:
    print(f"Hello, world! {x} {y}")

test_function(x=3, y=4, z=5)

TypeError: test_function() got an unexpected keyword argument 'z'

In [2]:
import json
import os
from datetime import datetime
from pydantic import BaseSettings, Field, BaseModel
from fastapi.encoders import jsonable_encoder

class OtherModel(BaseModel):
    a = 1
    b = 2

class Model(BaseSettings):
    the_id: OtherModel = Field(
        ...,
        description="The ID of the document.",
        env="THE_ID",
    )

    def dict(self, *args, **kwargs):
        output = super().dict(*args, **kwargs)
        for key, value in output.items():
            if isinstance(value, dict):
                output[key] = json.dumps(value)
            if isinstance(value, datetime):
                output[key] = value.isoformat()
        return output

json_obj = Model(the_id={"a": 1, "b": 2}).dict()
print(json_obj)
os.environ["THE_ID"] = json_obj["the_id"]
print(Model().the_id.a)

{'the_id': '{"a": 1, "b": 2}'}
1


In [6]:
from typing import Sequence
from pydantic import BaseSettings
import json
from numbers import Number

class BasePydanticSettings(BaseSettings):
    """Define the base settings for the package."""

    def dict(self, *args, **kwargs):
        """Override the dict method to convert nested, dicts, sets and sequences to JSON."""
        output = super().dict(*args, **kwargs)
        new_output = {}
        for key, value in output.items():
            if hasattr(self.Config, "env_prefix"):
                key = self.Config.env_prefix + key
            if isinstance(value, dict) or isinstance(value, list) or isinstance(value, set) or isinstance(value, tuple):
                value = json.dumps(value)
            key = key.upper()
            new_output[key] = value
        return new_output

    class Config:
        """Define the Pydantic config."""

        use_enum_values = True
        env_file = ".env"
        env_file_encoding = "utf-8"


# test the output. I'm getting weird issues with quotes being retained on the output.
class Settings(BasePydanticSettings):
    """Define the settings for the package."""
    test: str = "test"

print(Settings().dict())

{'TEST': 'test'}


In [10]:
import json
from loguru import logger
import boto3
from botocore.config import Config as BotoConfig
import pymongo
import urllib.parse

def get_secret(secret_name: str) -> dict:
    """Retrieve a secret from AWS Secrets Manager."""
    logger.info(f"Retrieving secret: {secret_name}")
    session = boto3.session.Session()
    boto_config = BotoConfig(
        retries={
            "max_attempts": 3,
            "mode": "standard",
        }
    )
    client = session.client(
        service_name="secretsmanager",
        config=boto_config,
        region_name="us-east-1",
    )
    secret_value_response = client.get_secret_value(SecretId=secret_name)
    secret = secret_value_response["SecretString"]
    try:
        secret = json.loads(secret)
    except json.JSONDecodeError:
        return secret

def connect_to_database(password: str, username: str) -> pymongo.MongoClient:
    logger.info("Creating MongoDB client")
    # quote password for valid url
    password = urllib.parse.quote_plus(password)
    mongo_client = pymongo.MongoClient(
        f"mongodb://adminUser:VrYRM27SAdDT!j8!HttyRJ7Z&Zy#14@tai-service-645860363137.us-east-1.docdb-elastic.amazonaws.com:27017/?tls=true&retryWrites=false",
        serverSelectionTimeoutMS=10000,
    )
    logger.info(mongo_client.server_info())
    return mongo_client

In [11]:
# test if we can access google.com
import requests


response = requests.get("https://google.com")
print(response.status_code)

200


In [12]:
admin_password = get_secret("dev/tai_service/document_DB/admin_password")
client = connect_to_database(admin_password, "adminUser")

[32m2023-06-25 13:32:02.977[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_secret[0m:[36m10[0m - [1mRetrieving secret: dev/tai_service/document_DB/admin_password[0m
[32m2023-06-25 13:32:03.298[0m | [1mINFO    [0m | [36m__main__[0m:[36mconnect_to_database[0m:[36m31[0m - [1mCreating MongoDB client[0m


ServerSelectionTimeoutError: No servers found yet, Timeout: 10.0s, Topology Description: <TopologyDescription id: 649841d3a1e80e6baf391354, topology_type: Unknown, servers: [<ServerDescription ('tai-service-645860363137.us-east-1.docdb-elastic.amazonaws.com', 27017) server_type: Unknown, rtt: None>]>

In [6]:
# test to see if exlcude works when exporting pydantic model to dict
from pydantic import BaseModel, Field, Extra

class StackConfigBaseModel(BaseModel):
    """Define the base model for stack configuration."""

    stack_id: str = Field(
        ...,
        description="The ID of the stack.",
    )
    description: str = Field(
        ...,
        min_length=10,
        max_length=255,
        description="The description of the stack.",
    )
    stack_name: str = Field(
        ...,
        description="The name of the stack/service.",
    )

    class Config:
        """Define configuration for stack configuration."""

        arbitrary_types_allowed = True
        validate_assignment = True
        extra = Extra.forbid

model = StackConfigBaseModel(
    stack_id="test",
    description="test-description",
    stack_name="test",
)
print(model.dict())
print(model.dict(exclude={"stack_id"}))

{'stack_id': 'test', 'description': 'test-description', 'stack_name': 'test'}
{'description': 'test-description', 'stack_name': 'test'}


In [4]:
# test a get request for object test in bucket tai-class-resource-queue. the bucket is public.
import boto3

client = boto3.client("s3")
object = client.get_object(
    Bucket="tai-class-resource-queue",
    Key="test.txt",
)
print(object["Body"].read())

b'Hi from S3!'


In [None]:
import pinecone

pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENVIRONMENT")
index = pinecone.Index("example-index")

query_response = index.query(
    namespace="example-namespace",
    top_k=10,
    include_values=True,
    include_metadata=True,
    vector=[0.1, 0.2, 0.3, 0.4],
    filter={
        "genre": {"$in": ["comedy", "documentary", "drama"]}
    }
)

In [1]:
from enum import Enum


class Test(Enum):
    test = "test"

x = Test.test
str(x)

'Test.test'

In [1]:
# test if UUID can be used as a key in a dict
import uuid
uuid_ = uuid.uuid4()
x = {uuid_: "test"}

In [3]:
print(x[uuid_])

test


In [4]:
from enum import Enum
from pathlib import Path
import filetype
class FileType(Enum):
    """Define the file types for the document."""

    PDF = "pdf"

path = Path("tests/unittests/api/indexer/download.png")
kind = filetype.guess(path)
print(kind.extension)
# check if file is txt file
try:
    with open(path, "r") as file:
        file.read()
        print("txt file")
except UnicodeDecodeError:
    print("not a txt file")
    print(kind.extension)
    print(FileType(kind.extension))


png
not a txt file
png


ValueError: 'png' is not a valid FileType

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
RecursiveCharacterTextSplitter.from_language()
from langchain.document_loaders import PDFMinerLoader

In [2]:
from enum import Enum

class Test(str, Enum):
    test = "test"
    test1 = "test1"

x = Test.test
#check if x is in the Enum Test
print(x in Test)

True


In [6]:
from langchain.embeddings import OpenAIEmbeddings

embber = OpenAIEmbeddings(openai_api_key="test")

embber.embed_documents(["how fast can i run?", "this is the fastest runner in the world"])

AuthenticationError: Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.