diff --git a/GraphRAG/README.md b/GraphRAG/README.md index d654357d44..3c9de58d69 100644 --- a/GraphRAG/README.md +++ b/GraphRAG/README.md @@ -72,7 +72,7 @@ Here is an example of `Nike 2023` pdf. # download pdf file wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf # upload pdf file with dataprep -curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \ +curl -X POST "http://${host_ip}:11103/v1/dataprep/ingest" \ -H "Content-Type: multipart/form-data" \ -F "files=@./nke-10k-2023.pdf" ``` @@ -80,8 +80,7 @@ curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \ ```bash curl http://${host_ip}:8888/v1/graphrag \ -H "Content-Type: application/json" \ - -d '{ - "model": "gpt-4o-mini","messages": [{"role": "user","content": "What is the revenue of Nike in 2023? + -d '{"messages": [{"role": "user","content": "where do Nike subsidiaries operate? "}]}' ``` diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml index 29171a20f2..76f1ab9f63 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml @@ -5,52 +5,65 @@ services: neo4j-apoc: image: neo4j:latest container_name: neo4j-apoc + ports: + - "${NEO4J_PORT1:-7474}:7474" + - "${NEO4J_PORT2:-7687}:7687" volumes: - - /$HOME/neo4j/logs:/logs - - /$HOME/neo4j/config:/config - - /$HOME/neo4j/data:/data - - /$HOME/neo4j/plugins:/plugins + - ./data/neo4j/logs:/logs + - ./data/neo4j/config:/config + - ./data/neo4j/data:/data + - ./data/neo4j/plugins:/plugins ipc: host environment: + - no_proxy=${no_proxy} + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD} - NEO4J_PLUGINS=["apoc"] - NEO4J_apoc_export_file_enabled=true - NEO4J_apoc_import_file_enabled=true - NEO4J_apoc_import_file_use__neo4j__config=true - NEO4J_dbms_security_procedures_unrestricted=apoc.\* - ports: - - "7474:7474" - - "7687:7687" + - NEO4J_server_bolt_advertised__address=localhost:${NEO4J_PORT2} restart: always - tei-embedding-service: + healthcheck: + test: wget http://localhost:7474 || exit 1 + interval: 5s + timeout: 10s + retries: 20 + start_period: 3s + tei-embedding-serving: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - container_name: tei-embedding-server + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" ports: - - "6006:80" + - "${TEI_EMBEDDER_PORT:-12000}:80" volumes: - "./data:/data" shm_size: 1g environment: no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - ipc: host - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - tgi-gaudi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.0.6 + host_ip: ${host_ip} + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + tgi-gaudi-server: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 container_name: tgi-gaudi-server ports: - - "6005:80" + - ${LLM_ENDPOINT_PORT:-8008}:80 volumes: - - "./data:/data" + - "${DATA_PATH:-./data}:/data" environment: no_proxy: ${no_proxy} - NO_PROXY: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} HF_TOKEN: ${HF_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 @@ -60,33 +73,44 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-2048} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS:-4096} TEXT_GENERATION_SERVER_IGNORE_EOS_TOKEN: false runtime: habana cap_add: - SYS_NICE ipc: host - command: --model-id ${LLM_MODEL_ID} --max-input-length 6000 --max-total-tokens 8192 + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} + dataprep-neo4j-llamaindex: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - container_name: dataprep-neo4j-server + container_name: dataprep-neo4j-llamaindex depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service + neo4j-apoc: + condition: service_healthy + tgi-gaudi-server: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy ports: - - "6004:5000" + - "${DATAPREP_PORT:-11103}:5000" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} host_ip: ${host_ip} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HF_TOKEN} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX" NEO4J_URL: ${NEO4J_URL} NEO4J_USERNAME: ${NEO4J_USERNAME} NEO4J_PASSWORD: ${NEO4J_PASSWORD} - DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX" TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} OPENAI_API_KEY: ${OPENAI_API_KEY} @@ -94,59 +118,61 @@ services: OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} LLM_MODEL_ID: ${LLM_MODEL_ID} - MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS} LOGFLAG: ${LOGFLAG} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS:-4096} restart: unless-stopped - retriever-neo4j-llamaindex: + retriever-neo4j: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} - container_name: retriever-neo4j-server - depends_on: - - neo4j-apoc - - tgi-gaudi-service - - tei-embedding-service + container_name: retriever-neo4j ports: - - "7000:7000" + - "${RETRIEVER_PORT:-7000}:7000" ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - host_ip: ${host_ip} - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_TOKEN: ${HF_TOKEN} - NEO4J_URI: ${NEO4J_URL} - NEO4J_USERNAME: ${NEO4J_USERNAME} - NEO4J_PASSWORD: ${NEO4J_PASSWORD} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG:-False} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_NEO4J} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} - OPENAI_API_KEY: ${OPENAI_API_KEY} - OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL} - OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID} LLM_MODEL_ID: ${LLM_MODEL_ID} - MAX_OUTPUT_TOKENS: ${MAX_OUTPUT_TOKENS} - LOGFLAG: ${LOGFLAG} - RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J" - restart: unless-stopped + NEO4J_URI: ${NEO4J_URI} + NEO4J_URL: ${NEO4J_URI} + NEO4J_USERNAME: ${NEO4J_USERNAME} + NEO4J_PASSWORD: ${NEO4J_PASSWORD} + VDMS_USE_CLIP: 0 + host_ip: ${host_ip} + depends_on: + neo4j-apoc: + condition: service_healthy + tei-embedding-serving: + condition: service_healthy + tgi-gaudi-server: + condition: service_healthy graphrag-gaudi-backend-server: image: ${REGISTRY:-opea}/graphrag:${TAG:-latest} container_name: graphrag-gaudi-backend-server depends_on: - neo4j-apoc - - tei-embedding-service - - retriever-neo4j-llamaindex - - tgi-gaudi-service + - tei-embedding-serving + - retriever-neo4j + - tgi-gaudi-server ports: - "8888:8888" + - "${MEGA_SERVICE_PORT:-8888}:8888" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=graphrag-gaudi-backend-server - - RETRIEVER_SERVICE_HOST_IP=retriever-neo4j-llamaindex + - RETRIEVER_SERVICE_HOST_IP=retriever-neo4j - RETRIEVER_SERVICE_PORT=7000 - - LLM_SERVER_HOST_IP=tgi-gaudi-service - - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=tgi-gaudi-server + - LLM_SERVER_PORT=80 - LLM_MODEL_ID=${LLM_MODEL_ID} - LOGFLAG=${LOGFLAG} ipc: host diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh index 97c462c581..a4fd8049b0 100644 --- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh @@ -10,16 +10,25 @@ pushd "../../../../../" > /dev/null source .set_env.sh popd > /dev/null +export TEI_EMBEDDER_PORT=11633 +export LLM_ENDPOINT_PORT=11634 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" export OPENAI_LLM_MODEL="gpt-4o" -export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" -export TGI_LLM_ENDPOINT="http://${host_ip}:6005" -export NEO4J_URL="bolt://${host_ip}:7687" -export NEO4J_USERNAME=neo4j +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" +export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export NEO4J_PORT1=11631 +export NEO4J_PORT2=11632 +export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" +export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" +export NEO4J_USERNAME="neo4j" +export NEO4J_PASSWORD="neo4jtest" export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" export LOGFLAG=True -export RETRIEVER_SERVICE_PORT=80 -export LLM_SERVER_PORT=80 -export MAX_OUTPUT_TOKENS=1024 +export MAX_INPUT_TOKENS=4096 +export MAX_TOTAL_TOKENS=8192 +export DATA_PATH="/mnt/nvme2n1/hf_cache" +export DATAPREP_PORT=11103 +export RETRIEVER_PORT=11635 diff --git a/GraphRAG/tests/test_compose_on_gaudi.sh b/GraphRAG/tests/test_compose_on_gaudi.sh index 17f03ce61e..bec978ad51 100755 --- a/GraphRAG/tests/test_compose_on_gaudi.sh +++ b/GraphRAG/tests/test_compose_on_gaudi.sh @@ -12,7 +12,7 @@ export TAG=${IMAGE_TAG} WORKPATH=$(dirname "$PWD") LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -33,25 +33,38 @@ function build_docker_images() { echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 + docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 docker images && sleep 1s } function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + + export TEI_EMBEDDER_PORT=11633 + export LLM_ENDPOINT_PORT=11634 + export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + export OPENAI_EMBEDDING_MODEL="text-embedding-3-small" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export OPENAI_LLM_MODEL="gpt-4o" + export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-8B-Instruct" + export TGI_LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" + export NEO4J_PORT1=11631 + export NEO4J_PORT2=11632 + export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}" + export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}" export NEO4J_USERNAME="neo4j" export NEO4J_PASSWORD="neo4jtest" - export NEO4J_URL="bolt://${ip_address}:7687" - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006" - export TGI_LLM_ENDPOINT="http://${ip_address}:6005" - export host_ip=${ip_address} - export LOGFLAG=true - export MAX_OUTPUT_TOKENS="1024" + export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest" + export LOGFLAG=True + export MAX_INPUT_TOKENS=4096 + export MAX_TOTAL_TOKENS=8192 + export DATAPREP_PORT=11103 + export RETRIEVER_PORT=11635 + export MEGA_SERVICE_PORT=8888 unset OPENAI_API_KEY # Start Docker Containers @@ -116,7 +129,7 @@ function validate_microservices() { # validate neo4j-apoc validate_service \ - "${ip_address}:7474" \ + "${host_ip}:${NEO4J_PORT1}" \ "200 OK" \ "neo4j-apoc" \ "neo4j-apoc" \ @@ -124,45 +137,46 @@ function validate_microservices() { # tei for embedding service validate_service \ - "${ip_address}:6006/embed" \ + "${host_ip}:${TEI_EMBEDDER_PORT}/embed" \ "[[" \ "tei-embedding-service" \ - "tei-embedding-server" \ + "tei-embedding-serving" \ '{"inputs":"What is Deep Learning?"}' sleep 1m # retrieval can't curl as expected, try to wait for more time + # tgi for llm service + validate_service \ + "${host_ip}:${LLM_ENDPOINT_PORT}/generate" \ + "generated_text" \ + "tgi-gaudi-service" \ + "tgi-gaudi-server" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + # test /v1/dataprep/ingest graph extraction echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt validate_service \ - "http://${ip_address}:6004/v1/dataprep/ingest" \ + "http://${host_ip}:${DATAPREP_PORT}/v1/dataprep/ingest" \ "Data preparation succeeded" \ "extract_graph_neo4j" \ - "dataprep-neo4j-server" + "dataprep-neo4j-llamaindex" sleep 2m # retrieval microservice validate_service \ - "${ip_address}:7000/v1/retrieval" \ - "retrieved_docs" \ + "${host_ip}:${RETRIEVER_PORT}/v1/retrieval" \ + "documents" \ "retriever_community_answers_neo4j" \ - "retriever-neo4j-server" \ - "{\"model\": \"gpt-4o-mini\",\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}" + "retriever-neo4j" \ + "{\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}" - # tgi for llm service - validate_service \ - "${ip_address}:6005/generate" \ - "generated_text" \ - "tgi-gaudi-service" \ - "tgi-gaudi-server" \ - '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' -} + } function validate_megaservice() { # Curl the Mega Service validate_service \ - "${ip_address}:8888/v1/graphrag" \ + "${host_ip}:${MEGA_SERVICE_PORT}/v1/graphrag" \ "data: " \ "graphrag-megaservice" \ "graphrag-gaudi-backend-server" \ @@ -181,7 +195,7 @@ function validate_frontend() { fi source activate ${conda_env_name} - sed -i "s/localhost/$ip_address/g" playwright.config.ts + sed -i "s/localhost/$host_ip/g" playwright.config.ts conda install -c conda-forge nodejs=22.6.0 -y npm install && npm ci && npx playwright install --with-deps