Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ jobs:
run: |
./occ app_api:daemon:register --net host manual_install "Manual Install" manual-install http localhost http://localhost:8080
./occ app_api:app:register context_chat_backend manual_install --json-info "{\"appid\":\"context_chat_backend\",\"name\":\"Context Chat Backend\",\"daemon_config_name\":\"manual_install\",\"version\":\"${{ fromJson(steps.appinfo.outputs.result).version }}\",\"secret\":\"12345\",\"port\":10034,\"scopes\":[],\"system_app\":0}" --force-scopes --wait-finish
ls -la context_chat_backend/persistent_storage/*

- name: Scan files, baseline
run: |
Expand All @@ -216,8 +217,8 @@ jobs:

- name: Run the prompts
run: |
./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' &
./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' &
./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker1_logs 2>&1 &
./occ background-job:worker 'OC\TaskProcessing\SynchronousBackgroundJob' > worker2_logs 2>&1 &

OUT1=$(./occ context_chat:prompt admin "Which factors are taken into account for the Ethical AI Rating?")
echo "$OUT1"
Expand Down Expand Up @@ -259,15 +260,40 @@ jobs:
echo "Memory usage during prompt is stable. No memory leak detected."
fi

- name: Show logs
- name: Show server logs
if: always()
run: |
cat data/nextcloud.log
echo '--------------------------------------------------'

- name: Show context_chat specific logs
if: always()
run: |
cat data/context_chat.log

- name: Show task processing worker logs
if: always()
run: |
tail -v -n +1 worker?_logs || echo "No worker logs"

- name: Show main app logs
if: always()
run: |
cat context_chat_backend/backend_logs || echo "No main backend logs"

- name: Show main app JSON logs
if: always()
run: |
tail -v -n +1 context_chat_backend/persistent_storage/logs/ccb.log* || echo "No logs in logs directory"

- name: Show embedding server logs
if: always()
run: |
cat context_chat_backend/em_backend_logs || echo "No main backend logs"
echo '--------------------------------------------------'
tail -v -n +1 context_chat_backend/persistent_storage/logs/* || echo "No logs in logs directory"

- name: Show embedding server JSON logs
if: always()
run: |
tail -v -n +1 context_chat_backend/persistent_storage/logs/em_server.log* || echo "No logs in logs directory"

summary:
permissions:
Expand Down
19 changes: 15 additions & 4 deletions context_chat_backend/chain/query_proc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
#
import logging
from sys import maxsize as SYS_MAXSIZE

from langchain.llms.base import LLM
from transformers import GPT2Tokenizer

from ..types import TConfig

logger = logging.getLogger('ccb.chain')
TOKENIZER = GPT2Tokenizer.from_pretrained('gpt2')


def get_num_tokens(text: str) -> int:
'''
Returns the number of tokens in the text using the fast GPT2 tokenizer.
'''
return len(TOKENIZER.encode(text, max_length=SYS_MAXSIZE, truncation=True))


def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_chunks: list[str]) -> str:
'''
Expand Down Expand Up @@ -36,16 +47,16 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_
) \
or 4096

query_tokens = llm.get_num_tokens(query)
template_tokens = llm.get_num_tokens(template.format(context='', question=''))
query_tokens = get_num_tokens(query)
template_tokens = get_num_tokens(template.format(context='', question=''))

# remaining tokens after the template, query and 'to be' generated tokens
remaining_tokens = n_ctx - template_tokens - query_tokens - n_gen

# If the query is too long to fit in the context, truncate it (keeping the template)
if remaining_tokens <= 0:
new_remaining_tokens = n_ctx - template_tokens - n_gen
while query and llm.get_num_tokens(query) > new_remaining_tokens:
while query and get_num_tokens(query) > new_remaining_tokens:
query = ' '.join(query.split()[:-10])

if not query:
Expand All @@ -57,7 +68,7 @@ def get_pruned_query(llm: LLM, config: TConfig, query: str, template: str, text_

while text_chunks and remaining_tokens > 0:
context = text_chunks.pop(0)
context_tokens = llm.get_num_tokens(context)
context_tokens = get_num_tokens(context)

if context_tokens <= remaining_tokens:
accepted_chunks.append(context)
Expand Down
1 change: 1 addition & 0 deletions main_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
'model_files',
em_conf.llama['model'],
)
logger.debug(f'Trying model path: {em_conf.llama["model"]}')

# if the model file is still not found, raise an error
if not os.path.isfile(em_conf.llama['model']):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ langchain-postgres
langchain-unstructured
llama_cpp_python
msg-parser
nc_py_api
# fix it to the version before niquests
nc_py_api==0.20.2
odfdo
odfpy
openpyxl
Expand Down
Loading