.env.example

# Copy this file to .env and fill in the values

## NOTE: The only required value is DEFAULT_OPENAI_API_KEY. The app should work if the other values 
# are left as is or not defined at all. However, you are strongly encouraged to fill in your 
# own SERPER_API_KEY value. It is also recommended to fill in the BYPASS_SETTINGS_RESTRICTIONS and
# BYPASS_SETTINGS_RESTRICTIONS_PASSWORD values as needed (see below).
DEFAULT_OPENAI_API_KEY="" # your OpenAI API key 

## Your Google Serper API key (for web searches). 
# If you don't set this, my key will be used, which may have 
# run out of credits by the time you use it (there's no payment info associated with it).
# You can get a free key without a credit card here: https://serper.dev/
# SERPER_API_KEY=""

# Mode to use if none is specified in the query
DEFAULT_MODE="/docs" # or "/web" | "/quotes" | "/details" | "/chat" | "/research"

# Variables controlling whether the Streamlit app imposes some functionality restrictions
BYPASS_SETTINGS_RESTRICTIONS="" # whether to immediately allow all settings (any non-empty string means true)
BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenAI API key field to bypass settings 
# restrictions. If BYPASS_SETTINGS_RESTRICTIONS is non-empty or if the user enters their own OpenAI API key, 
# this becomes - mostly - irrelevant, as full settings access is already granted. I say "mostly" because
# this password can also be used for a couple of admin-only features, such as deleting the default collection
# and deleting a range of collections (see dbmanager.py). Recommendation: for local use,
# the easiest thing to do is to set both values. For a public deployment, you may want to leave
# BYPASS_SETTINGS_RESTRICTIONS empty.

# If you are NOT using Azure, the following setting determines the chat model
# NOTE: You can change the model and temperature on the fly in Streamlit UI or via the API
MODEL_NAME="gpt-3.5-turbo-0125" # default model to use for chat
ALLOWED_MODELS="gpt-3.5-turbo-0125, gpt-4-turbo-2024-04-09" 
CONTEXT_LENGTH="16000" # you can also make it lower than the actual context length
EMBEDDINGS_MODEL_NAME="text-embedding-3-large"
EMBEDDINGS_DIMENSIONS="3072" # number of dimensions for the embeddings model
# Possible embedding models and their dimensions:
# text-embedding-ada-002: 1536
# text-embedding-3-small: 1536 or 512
# text-embedding-3-large: 3072, 1024 or 512
# NOTE: depending on the model, different relevance score thresholds are needed (see
# score_threshold_min/max in chroma_ddg_retriever.py). Currently these values are auto-set
# based on the model name, but currently they have only been tested for the 
# "text-embedding-3-large"-3072 and "text-embedding-ada-002" models.

# If you are using Azure, uncomment the following settings and fill in the values
# AZURE_OPENAI_API_KEY="" # your OpenAI API key 
# OPENAI_API_TYPE="azure" # yours may be different 
# OPENAI_API_BASE="https://techops.openai.azure.com" # yours may be different 
# OPENAI_API_VERSION="2023-05-15" # or whichever version you're using
# CHAT_DEPLOYMENT_NAME="gpt-35-turbo" # your deployment name for the chat model you want to use
# EMBEDDINGS_DEPLOYMENT_NAME="text-embedding-ada-002" # your deployment name for the embedding model

TEMPERATURE="0.3" # 0 to 2, higher means more random, lower means more conservative (>1.6 is jibberish)
LLM_REQUEST_TIMEOUT="3" # seconds to wait before request to the LLM service should be considered timed
# out and the bot should retry sending the request (a few times with exponential back-off).
# For Azure, it seems that a slightly longer timeout is needed, about 9 seconds.

# Whether to use Playwright for more reliable web scraping (any non-empty string means true)
# It's recommended to use Playwright but it may not work in all environments and requires
# a small amount of setup (mostly just running "playwright install" - you will be prompted)
USE_PLAYWRIGHT="" 

DEFAULT_COLLECTION_NAME="docdocgo-documentation" # name of the initially selected collection

## There are two ways to use the Chroma vector database: using a local database or via HTTP 
## (e.g. by running it in a Docker container). The default is the local option. While it's the
## simplest way to get started, the HTTP option is more robust and scalable.

## The following item is only relevant if you want to use a local Chroma db
VECTORDB_DIR="chroma/" # directory of your doc database

## The following items are only relevant if you want to run Chroma in a Docker container
USE_CHROMA_VIA_HTTP="" # whether to use Chroma via HTTP (any non-empty string means true)
CHROMA_SERVER_AUTH_CREDENTIALS="" # choose your own (must be the same as in your Chroma Docker container)
CHROMA_SERVER_HOST="localhost" # IP address of your Chroma server
CHROMA_SERVER_HTTP_PORT="8000" # port your Chroma server is listening on

## Settings for the response

# Whether to include the error message in the user-facing error message
INCLUDE_ERROR_IN_USER_FACING_ERROR_MSG="sure" 

## Settings for Streamlit

DOMAIN_NAME_FOR_SHARING="" # domain name for sharing links (e.g. "https://localhost:8501")

# Chat avatars (if you don't set these, the default avatars will be used)
USER_AVATAR="" # URL of the user's chat avatar (e.g. "./images/user-avatar-example.png")
BOT_AVATAR="" # URL of the bot's chat avatar (e.g. "./images/bot-avatar-example.png")

# Warning message to display at the top of the Streamlit app (normally should be left empty)
STREAMLIT_WARNING_NOTIFICATION=""

## The following items are only relevant for ingesting docs
COLLECTON_NAME_FOR_INGESTED_DOCS="" # collection name for the docs you want to ingest
DOCS_TO_INGEST_DIR_OR_FILE="" # path to directory or single file with your docs to ingest

## The following items are only relevant if running the FastAPI server
DOCDOCGO_API_KEY="" # choose your own 
MAX_UPLOAD_BYTES="104857600" # max size of files that can be uploaded (default is 100MB)

## Logging settings 
DEFAULT_LOGGER_NAME="ddg"

# Overrides for config/logging.json
LOG_FORMAT="%(asctime)s - %(name)s - %(levelname)s - %(message)s - [%(filename)s:%(lineno)d in %(funcName)s]"
LOG_LEVEL="INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL

## Additional options, used in development (any non-empty string means true)
RERAISE_EXCEPTIONS="" # don't keep convo going if an exception is raised, just reraise it
PRINT_STANDALONE_QUERY="" # print query used for the retriever
PRINT_QA_PROMPT="" # print the prompt used for the final response (if asking about your docs)
PRINT_CONDENSE_QUESTION_PROMPT="" # print the prompt used to condense the question
PRINT_SIMILARITIES="" # print the similarities between the query and the docs
PRINT_RESEARCHER_PROMPT="" # print the prompt used for the final researcher response
INITIAL_TEST_QUERY_STREAMLIT="" # auto-run as the first query in Streamlit