/
triton-2.x.yaml
88 lines (88 loc) · 2.37 KB
/
triton-2.x.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
enable-auth: "false"
enable-route: "true"
maxLoadingConcurrency: "2"
opendatahub.io/template-display-name: triton-2.x Model Server Alexei
opendatahub.io/template-name: triton-2.x-alexei
openshift.io/display-name: triton-serving-runtime
labels:
name: modelmesh-serving-triton-2.x
opendatahub.io/dashboard: "true"
name: triton-2.x
namespace: huggingface
spec:
builtInAdapter:
memBufferBytes: 134217728
modelLoadingTimeoutMillis: 270000
runtimeManagementPort: 8001
serverType: triton
containers:
- args:
- -c
- 'mkdir -p /models/_triton_models; chmod 777 /models/_triton_models; exec tritonserver
"--model-repository=/models/_triton_models" "--model-control-mode=explicit"
"--strict-model-config=false" "--strict-readiness=false" "--allow-http=true"
"--allow-sagemaker=false" "--grpc-keepalive-time=10000" "--grpc-keepalive-timeout=999999999"
"--grpc-keepalive-permit-without-calls=True" "--grpc-http2-max-pings-without-data=0"
"--grpc-http2-min-recv-ping-interval-without-data=5000" "--grpc-http2-max-ping-strikes=0" '
command:
- /bin/sh
image: image-registry.openshift-image-registry.svc:5000/modelmesh-serving/custom-triton-server:latest
livenessProbe:
exec:
command:
- curl
- --fail
- --silent
- --show-error
- --max-time
- "9"
- http://localhost:8000/v2/health/live
initialDelaySeconds: 5
periodSeconds: 30
timeoutSeconds: 10
name: triton
resources:
limits:
cpu: "5"
memory: 12Gi
nvidia.com/gpu: "1"
requests:
cpu: 500m
memory: 12Gi
nvidia.com/gpu: "1"
volumeMounts:
- mountPath: /dev/shm
name: dshm
grpcDataEndpoint: port:8001
grpcEndpoint: port:8085
multiModel: true
protocolVersions:
- grpc-v2
supportedModelFormats:
- autoSelect: true
name: keras
version: "2"
- autoSelect: true
name: onnx
version: "1"
- autoSelect: true
name: pytorch
version: "1"
- autoSelect: true
name: tensorflow
version: "1"
- autoSelect: true
name: tensorflow
version: "2"
- autoSelect: true
name: tensorrt
version: "7"
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm