luoweb · luoweb · Oct 12, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/.github/workflows/docker-cd.yaml b/.github/workflows/docker-cd.yaml
@@ -1,11 +1,14 @@
 name: Xinference CD for DockerHub
 
 on:
-  schedule:
-    - cron: '0 18 * * *'
+  # schedule:
+  #   - cron: '0 18 * * *'
   push:
     tags:
       - '*'
+    branches:
+      - "main"
+      - "master"
   workflow_dispatch:
 
 concurrency:
@@ -14,12 +17,24 @@ concurrency:
 
 jobs:
   build:
+    runs-on: ubuntu-latest
     timeout-minutes: 120
-    runs-on: self-hosted
+    # runs-on: self-hosted
     strategy:
       matrix:
         python-version: [ "3.9" ]
     steps:
+      # - uses: AutoModality/action-clean@v1
+      - name: Clean docker image cache
+        shell: bash
+        # if: ${{ github.repository == 'luoweb/inference' }}
+        run: |
+          df -h
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          df -h
       - name: Check out code
         uses: actions/checkout@v3
         with:
@@ -34,7 +49,7 @@ jobs:
 
       - name: Build and push Docker image
         shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
+        if: ${{ github.repository == 'luoweb/inference' }}
         env:
           DOCKER_ORG: ${{ secrets.DOCKERHUB_USERNAME }}
           PY_VERSION: ${{ matrix.python-version }}
@@ -58,19 +73,13 @@ jobs:
               git checkout $branch
               export IMAGE_TAG="nightly-$branch"
             fi
-            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}" --progress=plain -f xinference/deploy/docker/Dockerfile .
+            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}" -f xinference/deploy/docker/Dockerfile .
             docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}"
-            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu" --progress=plain -f xinference/deploy/docker/cpu.Dockerfile .
-            docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu"
-            echo "XINFERENCE_IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
           done
 
           if [[ -n "$GIT_TAG" ]]; then
             docker tag "$DOCKER_ORG/xinference:${GIT_TAG}" "$DOCKER_ORG/xinference:latest"
             docker push "$DOCKER_ORG/xinference:latest"
-            docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
-            docker push "$DOCKER_ORG/xinference:latest-cpu"
-            echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
           fi
 
       - name: Log in to Aliyun Docker Hub
@@ -82,19 +91,23 @@ jobs:
 
       - name: Push docker image to Aliyun
         shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
+        if: ${{ github.repository == 'luoweb/inference' }}
         env:
-          DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/xprobe_xinference
+          DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/roweb
         run: |
+          docker tag "luoweb/xinference:${IMAGE_TAG}" "${DOCKER_ORG}/xinference:${IMAGE_TAG}"
+          docker push "${DOCKER_ORG}/xinference:${IMAGE_TAG}"
+          docker tag "luoweb/xinference:${IMAGE_TAG}-cpu" "${DOCKER_ORG}/xinference:${IMAGE_TAG}-cpu"
+          docker push "${DOCKER_ORG}/xinference:${IMAGE_TAG}-cpu"
           if [[ -n "$XINFERENCE_GIT_TAG" ]]; then
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}" "$DOCKER_ORG/xinference:latest"
+            docker tag "luoweb/xinference:${XINFERENCE_GIT_TAG}" "$DOCKER_ORG/xinference:latest"
             docker push "$DOCKER_ORG/xinference:latest"
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
+            docker tag "luoweb/xinference:${XINFERENCE_GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
             docker push "$DOCKER_ORG/xinference:latest-cpu"
           fi
 
       - name: Clean docker image cache
         shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
+        if: ${{ github.repository == 'luoweb/inference' }}
         run: |
           docker system prune -f -a
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -1,9 +1,9 @@
 name: Python CI
 
 on:
-  push:
-    branches:
-      - '*'
+  # push:
+  #   branches:
+  #     - '*'
   pull_request:
     types: ['opened', 'reopened', 'synchronize']
 

diff --git a/xinference/deploy/docker/Dockerfile b/xinference/deploy/docker/Dockerfile
@@ -1,19 +1,32 @@
-FROM vllm/vllm-openai:v0.6.0
+# FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
+FROM pytorch/pytorch:2.1.2-cuda11.8-cudnn8-devel
+# ARG BASE_TAG=main-base
+# FROM winglian/axolotl-base:$BASE_TAG
+# FROM registry.cn-hangzhou.aliyuncs.com/roweb/axolotl-base:$BASE_TAG
+# FROM vllm/vllm-openai:latest
+# FROM python:3.11-slim 
+# FROM vllm/vllm-openai:v0.6.0
 
 COPY . /opt/inference
 WORKDIR /opt/inference
 
+ENV DEBIAN_FRONTEND=noninteractive
 ENV NVM_DIR /usr/local/nvm
 ENV NODE_VERSION 14.21.1
 
 RUN apt-get -y update \
-  && apt install -y curl procps git libgl1 ffmpeg \
+  && DEBIAN_FRONTEND=noninteractive apt install -y curl procps git libgl1 ffmpeg \
+#   && apt install -y curl procps git libgl1 ffmpeg \
   # upgrade libstdc++ and libc for llama-cpp-python
   && printf "\ndeb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse" >> /etc/apt/sources.list \
   && apt-get -y update \
   && apt-get install -y --only-upgrade libstdc++6 && apt install -y libc6 \
   && mkdir -p $NVM_DIR \
   && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
+  # && curl -o- https://mirror.ghproxy.com/https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
+  # && curl -o- https://mirror.ghproxy.com/https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | \
+  #  sed -e 's#https://github.com/#https://mirror.ghproxy.com/https://github.com/#g'| \
+  # sed -e 's#https://raw.githubusercontent.com/#https://mirror.ghproxy.com/https://raw.githubusercontent.com/#g'| bash \
   && . $NVM_DIR/nvm.sh \
   && nvm install $NODE_VERSION \
   && nvm alias default $NODE_VERSION \
@@ -27,7 +40,10 @@ ARG PIP_INDEX=https://pypi.org/simple
 RUN pip install --upgrade -i "$PIP_INDEX" pip && \
     pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
     # use pre-built whl package for llama-cpp-python, otherwise may core dump when init llama in some envs
-    pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
+    # pip install "llama-cpp-python" -i https://abetlen.github.io/llama-cpp-python/whl/cu121 && \
+    pip install llama-cpp-python --prefer-binary --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu118 && \
+    CMAKE_ARGS="-DGGML_CUBLAS=ON" pip install -i "$PIP_INDEX" -U "chatglm-cpp<0.4.0" && \
+#     pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
     pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements.txt && \
     pip install -i "$PIP_INDEX" --no-deps sglang && \
     pip uninstall flashinfer -y && \