luoweb · luoweb · Jul 13, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/.github/workflows/docker-cd.yaml b/.github/workflows/docker-cd.yaml
@@ -6,6 +6,9 @@ on:
   push:
     tags:
       - '*'
+    branches:
+      - "main"
+      - "master"
   workflow_dispatch:
 
 concurrency:
@@ -14,11 +17,14 @@ concurrency:
 
 jobs:
   build:
-    runs-on: self-hosted
+    # runs-on: self-hosted
+    runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: [ "3.9" ]
     steps:
+      - uses: AutoModality/action-clean@v1
+
       - name: Check out code
         uses: actions/checkout@v3
         with:
@@ -33,7 +39,7 @@ jobs:
 
       - name: Build and push Docker image
         shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
+        if: ${{ github.repository == 'luoweb/inference' }}
         env:
           DOCKER_ORG: ${{ secrets.DOCKERHUB_USERNAME }}
           PY_VERSION: ${{ matrix.python-version }}
@@ -57,19 +63,13 @@ jobs:
               git checkout $branch
               export IMAGE_TAG="nightly-$branch"
             fi
-            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}" --progress=plain -f xinference/deploy/docker/Dockerfile .
+            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}" -f xinference/deploy/docker/Dockerfile .
             docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}"
-            docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu" --progress=plain -f xinference/deploy/docker/cpu.Dockerfile .
-            docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu"
-            echo "XINFERENCE_IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
           done
 
           if [[ -n "$GIT_TAG" ]]; then
             docker tag "$DOCKER_ORG/xinference:${GIT_TAG}" "$DOCKER_ORG/xinference:latest"
             docker push "$DOCKER_ORG/xinference:latest"
-            docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
-            docker push "$DOCKER_ORG/xinference:latest-cpu"
-            echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
           fi
 
       - name: Log in to Aliyun Docker Hub
@@ -81,7 +81,7 @@ jobs:
 
       - name: Push docker image to Aliyun
         shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
+        if: ${{ github.repository == 'luoweb/inference' }}
         env:
           DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/xprobe_xinference
         run: |

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -1,9 +1,9 @@
 name: Python CI
 
 on:
-  push:
-    branches:
-      - '*'
+  # push:
+  #   branches:
+  #     - '*'
   pull_request:
     types: ['opened', 'reopened', 'synchronize']
 

diff --git a/xinference/deploy/docker/Dockerfile b/xinference/deploy/docker/Dockerfile
@@ -1,4 +1,9 @@
-FROM vllm/vllm-openai:latest
+# FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-devel
+FROM pytorch/pytorch:2.1.2-cuda11.8-cudnn8-devel
+# ARG BASE_TAG=main-base
+# FROM winglian/axolotl-base:$BASE_TAG
+# FROM registry.cn-hangzhou.aliyuncs.com/roweb/axolotl-base:$BASE_TAG
+# FROM vllm/vllm-openai:latest
 
 COPY . /opt/inference
 WORKDIR /opt/inference
@@ -9,7 +14,11 @@ ENV NODE_VERSION 14.21.1
 RUN apt-get -y update \
   && apt install -y curl procps git libgl1 \
   && mkdir -p $NVM_DIR \
-  && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
+  # && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
+  # && curl -o- https://mirror.ghproxy.com/https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash \
+  && curl -o- https://mirror.ghproxy.com/https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | \
+   sed -e 's#https://github.com/#https://mirror.ghproxy.com/https://github.com/#g'| \
+  sed -e 's#https://raw.githubusercontent.com/#https://mirror.ghproxy.com/https://raw.githubusercontent.com/#g'| bash \
   && . $NVM_DIR/nvm.sh \
   && nvm install $NODE_VERSION \
   && nvm alias default $NODE_VERSION \
@@ -23,7 +32,8 @@ ARG PIP_INDEX=https://pypi.org/simple
 RUN pip install --upgrade -i "$PIP_INDEX" pip && \
     pip install -i "$PIP_INDEX" "diskcache>=5.6.1" "jinja2>=2.11.3" && \
     # use pre-built whl package for llama-cpp-python, otherwise may core dump when init llama in some envs
-    pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
+    # pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
+    pip install "llama-cpp-python>=0.2.82" && \
     pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements.txt && \
     pip install -i "$PIP_INDEX" --no-deps sglang && \
     cd /opt/inference && \