Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/model-proxy/build/model-proxy.common.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Build stage
FROM golang:1.25.0 AS builder
WORKDIR /app

COPY ./src /app/model-proxy

RUN cd /app/model-proxy && go mod tidy && \
CGO_ENABLED=0 GOOS=linux go build -o /app/bin/modelproxy

# Final stage
FROM ubuntu:latest
WORKDIR /app

RUN apt-get update

RUN apt-get upgrade -y

COPY --from=builder /app/bin/modelproxy /app/bin/modelproxy
8 changes: 8 additions & 0 deletions src/model-proxy/config/model-proxy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

service_type: "common"

port: 9999
retry: 5
modelkey: "123"
33 changes: 33 additions & 0 deletions src/model-proxy/config/model_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import copy

class ModelProxy(object):
def __init__(self, cluster_conf, service_conf, default_service_conf):
self.cluster_conf = cluster_conf
self.service_conf = service_conf
self.default_service_conf = default_service_conf

def get_master_ip(self):
for host_conf in self.cluster_conf["machine-list"]:
if "pai-master" in host_conf and host_conf["pai-master"] == "true":
return host_conf["hostip"]

def validation_pre(self):
return True, None

def run(self):
result = copy.deepcopy(self.default_service_conf)
result.update(self.service_conf)
result["host"] = self.get_master_ip()
result["url"] = "http://{0}:{1}".format(self.get_master_ip(), result["port"])
return result

def validation_post(self, conf):
port = conf["model-proxy"].get("port")
if type(port) != int:
msg = "expect port in model-proxy to be int but get %s with type %s" % \
(port, type(port))
return False, msg
return True, None
12 changes: 12 additions & 0 deletions src/model-proxy/deploy/delete.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

pushd $(dirname "$0") > /dev/null

echo "Call stop script to stop all service first"
/bin/bash stop.sh || exit $?


popd > /dev/null
58 changes: 58 additions & 0 deletions src/model-proxy/deploy/model-proxy.yaml.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

kind: DaemonSet
apiVersion: apps/v1
metadata:
name: model-proxy-ds
spec:
selector:
matchLabels:
app: model-proxy
template:
metadata:
labels:
app: model-proxy
spec:
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
imagePullSecrets:
- name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }}
containers:
- name: model-proxy
image: {{ cluster_cfg['cluster']['docker-registry']['prefix'] }}model-proxy:{{ cluster_cfg['cluster']['docker-registry']['tag'] }}
imagePullPolicy: Always
command: ["/app/bin/modelproxy"]
args:
- "--port={{ cluster_cfg['model-proxy']['port'] }}"
- "--retry={{ cluster_cfg['model-proxy']['retry'] }}"
- "--modelkey={{ cluster_cfg['model-proxy']['modelkey'] }}"
- "--logdir=/usr/local/ltp/model-proxy/logs"
volumeMounts:
{%- if cluster_cfg['model-proxy']['log_pvc'] %}
- name: model-proxy-log-storage
mountPath: /usr/local/ltp/model-proxy
{%- else %}
- name: model-proxy-log
mountPath: /usr/local/ltp/model-proxy
{%- endif %}
ports:
- containerPort: {{ cluster_cfg["model-proxy"]["port"] }}
hostPort: {{ cluster_cfg["model-proxy"]["port"] }}
name: model-proxy
livenessProbe:
httpGet:
path: '/healthz'
port: model-proxy
initialDelaySeconds: 10
periodSeconds: 60
volumes:
- name: model-proxy-log
hostPath:
path: /var/log/model-proxy
{%- if cluster_cfg['model-proxy']['log_pvc'] %}
- name: model-proxy-log-storage
persistentVolumeClaim:
claimName: {{ cluster_cfg['model-proxy']['log_pvc'] }}
{%- endif %}
11 changes: 11 additions & 0 deletions src/model-proxy/deploy/refresh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

pushd $(dirname "$0") > /dev/null

bash stop.sh
bash start.sh

popd > /dev/null
21 changes: 21 additions & 0 deletions src/model-proxy/deploy/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

cluster-type:
- yarn
- k8s

prerequisite:
- rest-server

template-list:
- model-proxy.yaml

start-script: start.sh
stop-script: stop.sh
delete-script: delete.sh
refresh-script: refresh.sh


deploy-rules:
- in: pai-master
13 changes: 13 additions & 0 deletions src/model-proxy/deploy/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

pushd $(dirname "$0") > /dev/null

kubectl apply --overwrite=true -f model-proxy.yaml || exit $?

# Wait until the service is ready.
PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v model-proxy || exit $?

popd > /dev/null
6 changes: 6 additions & 0 deletions src/model-proxy/deploy/stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

kubectl delete --ignore-not-found --now "daemonset/model-proxy-ds"
20 changes: 20 additions & 0 deletions src/model-proxy/src/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

bin
Traces

venv
41 changes: 41 additions & 0 deletions src/model-proxy/src/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Instruction of model-proxy

## Overview

Model-proxy is a proxy service to forward requests from clients to different model jobs in LTP cluster. With one base url, client can access different models by specifying different model name in the request path, `model-proxy` service will forward the request to corresponding model job. If there are multiple jobs which are serving the same model, `model-proxy` will do load balancing among these jobs.

Workflow:

1. Client sends request to `model-proxy` service to list all models by `/v1/models` endpoint.
- During the list request, `model-proxy` will query LTP REST server to get all model serving jobs which the user can access, and then list all models which are being served by these jobs.

2. Client sends request to `model-proxy` service to access a specific model by openai sepc api request format, e.g. `POST /v1/chat/completions` with request body containing model name.
- During the access request, `model-proxy` will query LTP REST server to get all model serving jobs which are serving the requested model, and then forward the request to one of these jobs. If there are multiple jobs, `model-proxy` will do load balancing among these jobs.

## Configuration

### Requirements

- LTP model serving jobs should be deployed in the LTP cluster, and names of these jobs must include `model-serving`.

- LTP model serving jobs should support openai spec api, e.g. `/v1/chat/completions` endpoint.
* The endpoints should use the first taskrole's ip and port.
* The api key which is configured in model-proxy service should be supported by these endpoints. So if users want to make their model serving jobs accessible by model-proxy, they need to configure the same api key in their jobs, which will be provided by the cluster admin.

### Binary configuration

Model-proxy binary can be configured by flags:

- `--port`: the port that model-proxy service listens on, default is 9999
- `--retry`: the retry times when forwarding request to model job, default is 5
- `--logdir`: the directory to store log files, default is `./logs`
- `--modelkey`: the key which is used to request model serving jobs in the LTP cluster.

### Service configuration

```yaml
model-proxy:
port: 9999
retry: 5
modelkey: "ABCD1234" # the api key to access model serving jobs
```
3 changes: 3 additions & 0 deletions src/model-proxy/src/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module modelproxy

go 1.25
51 changes: 51 additions & 0 deletions src/model-proxy/src/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package main

import (
"flag"

"modelproxy/proxy"
"modelproxy/trace"
"modelproxy/types"
)

var (
port int
maxRetries int = 5 // default value
logFileDir string
modelKey string
)

func init() {
flag.IntVar(&port, "port", 9999, "port for the proxy server")
flag.IntVar(&maxRetries, "retry", 5, "max retries for the request to the model server")
flag.StringVar(&logFileDir, "logdir", "./logs", "path to the log file directory")
flag.StringVar(&modelKey, "modelkey", "", "model key for requesting model serving jobs")
}

func main() {
flag.Parse()

config := types.Config{
Server: &types.Server{
Host: "0.0.0.0",
Port: port,
MaxRetries: maxRetries,
ModelKey: modelKey,
},
Log: &types.Log{
LogStorage: &types.LogStorage{
LocalFolder: logFileDir,
AzureStorage: nil,
},
TraceRelatedKeys: []string{},
},
}
ph := proxy.NewProxyHandler(&config)
traceLogger := trace.NewJsonFileLogger(logFileDir)

ph.StartProxy(traceLogger)

}
78 changes: 78 additions & 0 deletions src/model-proxy/src/proxy/authenticator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package proxy

import (
"log"
"net/http"
"strings"
)

// obfuscateToken returns a truncated identifier for safely logging tokens.
func obfuscateToken(token string) string {
if len(token) <= 6 {
return "<redacted>"
}
return token[:3] + "***" + token[len(token)-3:]
}

type RestServerAuthenticator struct {
// rest-server token => model names => model urls
tokenToModels map[string]map[string][]string
modelKey string
}

func NewRestServerAuthenticator(tokenToModels map[string]map[string][]string, modelKey string) *RestServerAuthenticator {
if tokenToModels == nil {
tokenToModels = make(map[string]map[string][]string)
}
return &RestServerAuthenticator{
tokenToModels: tokenToModels,
modelKey: modelKey,
}
}

func (ra *RestServerAuthenticator) UpdateTokenModels(token string, model2Url map[string][]string) {
if ra.tokenToModels == nil {
ra.tokenToModels = make(map[string]map[string][]string)
}
ra.tokenToModels[token] = model2Url
}

// Check if the request is authenticated and return the available model urls
func (ra *RestServerAuthenticator) AuthenticateReq(req *http.Request, reqBody map[string]interface{}) (bool, []string) {
token := req.Header.Get("Authorization")
token = strings.Replace(token, "Bearer ", "", 1)
// read request body
model, ok := reqBody["model"].(string)
if !ok {
log.Printf("[-] Error: 'model' field missing or not a string in request body")
return false, []string{}
}
availableModels, ok := ra.tokenToModels[token]
if !ok {
// request to RestServer to get the models
log.Printf("[-] Error: token %s not found in the authenticator\n", obfuscateToken(token))
availableModels, err := GetJobModelsMapping(req, ra.modelKey)
if err != nil {
log.Printf("[-] Error: failed to get models for token %s: %v\n", obfuscateToken(token), err)
return false, []string{}
}
ra.tokenToModels[token] = availableModels
}
if len(availableModels) == 0 {
log.Printf("[-] Error: no models found")
return false, []string{}
}
if model == "" {
log.Printf("[-] Error: model is empty")
return false, []string{}
}
for m, v := range availableModels {
if m == model {
return true, v
}
}
return false, []string{}
}
Loading