microsoft · yukirora · Sep 15, 2025 · Aug 26, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/src/model-proxy/build/model-proxy.common.dockerfile b/src/model-proxy/build/model-proxy.common.dockerfile
@@ -0,0 +1,18 @@
+# Build stage
+FROM golang:1.25.0 AS builder
+WORKDIR /app
+
+COPY ./src /app/model-proxy
+
+RUN cd /app/model-proxy && go mod tidy && \
+    CGO_ENABLED=0 GOOS=linux go build -o /app/bin/modelproxy
+
+# Final stage
+FROM ubuntu:latest
+WORKDIR /app
+
+RUN apt-get update
+
+RUN apt-get upgrade -y
+
+COPY --from=builder /app/bin/modelproxy /app/bin/modelproxy
diff --git a/src/model-proxy/config/model-proxy.yaml b/src/model-proxy/config/model-proxy.yaml
@@ -0,0 +1,8 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+service_type: "common"
+
+port: 9999
+retry: 5
+modelkey: "123"
diff --git a/src/model-proxy/config/model_proxy.py b/src/model-proxy/config/model_proxy.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import copy
+
+class ModelProxy(object):
+    def __init__(self, cluster_conf, service_conf, default_service_conf):
+        self.cluster_conf = cluster_conf
+        self.service_conf = service_conf
+        self.default_service_conf = default_service_conf
+
+    def get_master_ip(self):
+        for host_conf in self.cluster_conf["machine-list"]:
+            if "pai-master" in host_conf and host_conf["pai-master"] == "true":
+                return host_conf["hostip"]
+
+    def validation_pre(self):
+        return True, None
+
+    def run(self):
+        result = copy.deepcopy(self.default_service_conf)
+        result.update(self.service_conf)
+        result["host"] = self.get_master_ip()
+        result["url"] = "http://{0}:{1}".format(self.get_master_ip(), result["port"])
+        return result
+
+    def validation_post(self, conf):
+        port = conf["model-proxy"].get("port")
+        if type(port) != int:
+            msg = "expect port in model-proxy to be int but get %s with type %s" % \
+                    (port, type(port))
+            return False, msg
+        return True, None
diff --git a/src/model-proxy/deploy/delete.sh b/src/model-proxy/deploy/delete.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+pushd $(dirname "$0") > /dev/null
+
+echo "Call stop script to stop all service first"
+/bin/bash stop.sh || exit $?
+
+
+popd > /dev/null
diff --git a/src/model-proxy/deploy/model-proxy.yaml.template b/src/model-proxy/deploy/model-proxy.yaml.template
@@ -0,0 +1,58 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+kind: DaemonSet
+apiVersion: apps/v1
+metadata:
+  name: model-proxy-ds
+spec:
+  selector:
+    matchLabels:
+      app: model-proxy
+  template:
+    metadata:
+      labels:
+        app: model-proxy
+    spec:
+      tolerations:
+      - key: node-role.kubernetes.io/master
+        effect: NoSchedule
+      imagePullSecrets:
+      - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }}
+      containers:
+      - name: model-proxy
+        image: {{ cluster_cfg['cluster']['docker-registry']['prefix'] }}model-proxy:{{ cluster_cfg['cluster']['docker-registry']['tag'] }}
+        imagePullPolicy: Always
+        command: ["/app/bin/modelproxy"]
+        args:
+          - "--port={{ cluster_cfg['model-proxy']['port'] }}"
+          - "--retry={{ cluster_cfg['model-proxy']['retry'] }}"
+          - "--modelkey={{ cluster_cfg['model-proxy']['modelkey'] }}"
+          - "--logdir=/usr/local/ltp/model-proxy/logs"
+        volumeMounts:
+        {%- if cluster_cfg['model-proxy']['log_pvc'] %}
+        - name: model-proxy-log-storage
+          mountPath: /usr/local/ltp/model-proxy
+        {%- else %}
+        - name: model-proxy-log
+          mountPath: /usr/local/ltp/model-proxy
+        {%- endif %}
+        ports:
+        - containerPort: {{ cluster_cfg["model-proxy"]["port"] }}
+          hostPort: {{ cluster_cfg["model-proxy"]["port"] }}
+          name: model-proxy
+        livenessProbe:
+          httpGet:
+            path: '/healthz'
+            port: model-proxy
+          initialDelaySeconds: 10
+          periodSeconds: 60
+      volumes:
+      - name: model-proxy-log
+        hostPath:
+          path: /var/log/model-proxy
+      {%- if cluster_cfg['model-proxy']['log_pvc'] %}
+      - name: model-proxy-log-storage
+        persistentVolumeClaim:
+          claimName: {{ cluster_cfg['model-proxy']['log_pvc'] }}
+      {%- endif %}
diff --git a/src/model-proxy/deploy/refresh.sh b/src/model-proxy/deploy/refresh.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+pushd $(dirname "$0") > /dev/null
+
+bash stop.sh
+bash start.sh
+
+popd > /dev/null
diff --git a/src/model-proxy/deploy/service.yaml b/src/model-proxy/deploy/service.yaml
@@ -0,0 +1,21 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+cluster-type:
+  - yarn
+  - k8s
+
+prerequisite:
+  - rest-server
+
+template-list:
+  - model-proxy.yaml
+
+start-script: start.sh
+stop-script: stop.sh
+delete-script: delete.sh
+refresh-script: refresh.sh
+
+
+deploy-rules:
+  - in: pai-master
diff --git a/src/model-proxy/deploy/start.sh b/src/model-proxy/deploy/start.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+pushd $(dirname "$0") > /dev/null
+
+kubectl apply --overwrite=true -f model-proxy.yaml || exit $?
+
+# Wait until the service is ready.
+PYTHONPATH="../../../deployment" python -m  k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v model-proxy || exit $?
+
+popd > /dev/null
diff --git a/src/model-proxy/deploy/stop.sh b/src/model-proxy/deploy/stop.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+kubectl delete --ignore-not-found --now "daemonset/model-proxy-ds"
diff --git a/src/model-proxy/src/.gitignore b/src/model-proxy/src/.gitignore
@@ -0,0 +1,20 @@
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, built with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Dependency directories (remove the comment below to include it)
+# vendor/
+
+bin
+Traces
+
+venv
diff --git a/src/model-proxy/src/README.md b/src/model-proxy/src/README.md
@@ -0,0 +1,41 @@
+# Instruction of model-proxy
+
+## Overview
+
+Model-proxy is a proxy service to forward requests from clients to different model jobs in LTP cluster. With one base url, client can access different models by specifying different model name in the request path, `model-proxy` service will forward the request to corresponding model job. If there are multiple jobs which are serving the same model, `model-proxy` will do load balancing among these jobs.
+
+Workflow:
+
+1. Client sends request to `model-proxy` service to list all models by `/v1/models` endpoint.
+   - During the list request, `model-proxy` will query LTP REST server to get all model serving jobs which the user can access, and then list all models which are being served by these jobs.
+
+2. Client sends request to `model-proxy` service to access a specific model by openai sepc api request format, e.g. `POST /v1/chat/completions` with request body containing model name.
+   - During the access request, `model-proxy` will query LTP REST server to get all model serving jobs which are serving the requested model, and then forward the request to one of these jobs. If there are multiple jobs, `model-proxy` will do load balancing among these jobs.
+
+## Configuration
+
+### Requirements 
+
+- LTP model serving jobs should be deployed in the LTP cluster, and names of these jobs must include `model-serving`.
+
+- LTP model serving jobs should support openai spec api, e.g. `/v1/chat/completions` endpoint. 
+   * The endpoints should use the first taskrole's ip and port.
+   * The api key which is configured in model-proxy service should be supported by these endpoints. So if users want to make their model serving jobs accessible by model-proxy, they need to configure the same api key in their jobs, which will be provided by the cluster admin.
+
+### Binary configuration
+
+Model-proxy binary can be configured by flags:
+
+- `--port`: the port that model-proxy service listens on, default is 9999
+- `--retry`: the retry times when forwarding request to model job, default is 5
+- `--logdir`: the directory to store log files, default is `./logs`
+- `--modelkey`: the key which is used to request model serving jobs in the LTP cluster.
+
+### Service configuration
+
+```yaml
+model-proxy:  
+    port: 9999
+    retry: 5
+    modelkey: "ABCD1234"  # the api key to access model serving jobs
+```
diff --git a/src/model-proxy/src/go.mod b/src/model-proxy/src/go.mod
@@ -0,0 +1,3 @@
+module modelproxy
+
+go 1.25
diff --git a/src/model-proxy/src/main.go b/src/model-proxy/src/main.go
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+package main
+
+import (
+	"flag"
+
+	"modelproxy/proxy"
+	"modelproxy/trace"
+	"modelproxy/types"
+)
+
+var (
+	port       int
+	maxRetries int = 5 // default value
+	logFileDir string
+	modelKey   string
+)
+
+func init() {
+	flag.IntVar(&port, "port", 9999, "port for the proxy server")
+	flag.IntVar(&maxRetries, "retry", 5, "max retries for the request to the model server")
+	flag.StringVar(&logFileDir, "logdir", "./logs", "path to the log file directory")
+	flag.StringVar(&modelKey, "modelkey", "", "model key for requesting model serving jobs")
+}
+
+func main() {
+	flag.Parse()
+
+	config := types.Config{
+		Server: &types.Server{
+			Host:       "0.0.0.0",
+			Port:       port,
+			MaxRetries: maxRetries,
+			ModelKey:   modelKey,
+		},
+		Log: &types.Log{
+			LogStorage: &types.LogStorage{
+				LocalFolder:  logFileDir,
+				AzureStorage: nil,
+			},
+			TraceRelatedKeys: []string{},
+		},
+	}
+	ph := proxy.NewProxyHandler(&config)
+	traceLogger := trace.NewJsonFileLogger(logFileDir)
+
+	ph.StartProxy(traceLogger)
+
+}
diff --git a/src/model-proxy/src/proxy/authenticator.go b/src/model-proxy/src/proxy/authenticator.go
@@ -0,0 +1,78 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+package proxy
+
+import (
+	"log"
+	"net/http"
+	"strings"
+)
+
+// obfuscateToken returns a truncated identifier for safely logging tokens.
+func obfuscateToken(token string) string {
+	if len(token) <= 6 {
+		return "<redacted>"
+	}
+	return token[:3] + "***" + token[len(token)-3:]
+}
+
+type RestServerAuthenticator struct {
+	// rest-server token => model names => model urls
+	tokenToModels map[string]map[string][]string
+	modelKey      string
+}
+
+func NewRestServerAuthenticator(tokenToModels map[string]map[string][]string, modelKey string) *RestServerAuthenticator {
+	if tokenToModels == nil {
+		tokenToModels = make(map[string]map[string][]string)
+	}
+	return &RestServerAuthenticator{
+		tokenToModels: tokenToModels,
+		modelKey:      modelKey,
+	}
+}
+
+func (ra *RestServerAuthenticator) UpdateTokenModels(token string, model2Url map[string][]string) {
+	if ra.tokenToModels == nil {
+		ra.tokenToModels = make(map[string]map[string][]string)
+	}
+	ra.tokenToModels[token] = model2Url
+}
+
+// Check if the request is authenticated and return the available model urls
+func (ra *RestServerAuthenticator) AuthenticateReq(req *http.Request, reqBody map[string]interface{}) (bool, []string) {
+	token := req.Header.Get("Authorization")
+	token = strings.Replace(token, "Bearer ", "", 1)
+	//  read request body
+	model, ok := reqBody["model"].(string)
+	if !ok {
+		log.Printf("[-] Error: 'model' field missing or not a string in request body")
+		return false, []string{}
+	}
+	availableModels, ok := ra.tokenToModels[token]
+	if !ok {
+		// request to RestServer to get the models
+		log.Printf("[-] Error: token %s not found in the authenticator\n", obfuscateToken(token))
+		availableModels, err := GetJobModelsMapping(req, ra.modelKey)
+		if err != nil {
+			log.Printf("[-] Error: failed to get models for token %s: %v\n", obfuscateToken(token), err)
+			return false, []string{}
+		}
+		ra.tokenToModels[token] = availableModels
+	}
+	if len(availableModels) == 0 {
+		log.Printf("[-] Error: no models found")
+		return false, []string{}
+	}
+	if model == "" {
+		log.Printf("[-] Error: model is empty")
+		return false, []string{}
+	}
+	for m, v := range availableModels {
+		if m == model {
+			return true, v
+		}
+	}
+	return false, []string{}
+}