92
92
)
93
93
from sglang .srt .managers .multi_tokenizer_mixin import (
94
94
MultiTokenizerManager ,
95
- deserialize_data ,
96
95
get_main_process_id ,
97
96
read_from_shared_memory ,
98
97
write_data_for_multi_tokenizer ,
@@ -136,33 +135,22 @@ def set_global_state(global_state: _GlobalState):
136
135
_global_state = global_state
137
136
138
137
139
- # Function to set up all middlewares for multi-tokenizer compatibility
140
- def setup_middlewares (api_key : Optional [str ], enable_metrics : bool ):
141
- """Setup all middlewares for both single and multi-process modes"""
142
- worker_pid = os .getpid ()
143
-
144
- if api_key :
145
- add_api_key_middleware (app , api_key )
146
- logger .info (f"Worker { worker_pid } added API key middleware" )
147
-
148
- if enable_metrics :
149
- add_prometheus_middleware (app )
150
- enable_func_timer ()
151
- logger .info (f"Worker { worker_pid } added prometheus middleware" )
152
-
153
-
154
138
async def init_multi_tokenizer () -> ServerArgs :
155
139
"""Read args information from shm and init tokenizer manager for current process"""
156
140
pid = os .getpid ()
157
141
main_pid = get_main_process_id ()
158
142
logger .info (f"current worker_id: { pid } , main processID: { main_pid } " )
159
143
160
144
# Read configuration from shared memory
161
- port_args_data = read_from_shared_memory (f"port_args_{ main_pid } " )
162
- server_args_data = read_from_shared_memory (f"server_args_{ main_pid } " )
163
- scheduler_info_data = read_from_shared_memory (f"scheduler_info_{ main_pid } " )
164
- port_args , server_args = deserialize_data (port_args_data , server_args_data )
165
- scheduler_info = scheduler_info_data
145
+ port_args , server_args , scheduler_info = read_from_shared_memory (
146
+ f"multi_tokenizer_args_{ main_pid } "
147
+ )
148
+ server_args : ServerArgs
149
+
150
+ # API key authentication is not supported in multi-tokenizer mode
151
+ assert (
152
+ server_args .api_key is None
153
+ ), "API key is not supported in multi-tokenizer mode"
166
154
167
155
port_args .tokenizer_ipc_name = (
168
156
f"ipc://{ tempfile .NamedTemporaryFile (delete = False ).name } "
@@ -193,13 +181,17 @@ async def init_multi_tokenizer() -> ServerArgs:
193
181
194
182
@asynccontextmanager
195
183
async def lifespan (fast_api_app : FastAPI ):
196
- server_args = getattr (fast_api_app , "server_args" , None )
197
- if server_args is None :
184
+ if not getattr (fast_api_app , "is_single_tokenizer_mode" , False ):
198
185
# Initialize multi-tokenizer support for worker processes
199
- fast_api_app .server_args = await init_multi_tokenizer ()
200
- setup_middlewares (
201
- fast_api_app .server_args .api_key , fast_api_app .server_args .enable_metrics
202
- )
186
+ fast_api_app .server_args : ServerArgs = await init_multi_tokenizer ()
187
+
188
+ # only metrics middleware is supported in multi-tokenizer mode
189
+ worker_pid = os .getpid ()
190
+ if fast_api_app .server_args .enable_metrics :
191
+ add_prometheus_middleware (app )
192
+ enable_func_timer ()
193
+
194
+ logger .info (f"Worker { worker_pid } added prometheus middleware" )
203
195
fast_api_app .warmup_thread = threading .Thread (
204
196
target = _wait_and_warmup ,
205
197
args = (
@@ -1187,12 +1179,10 @@ def launch_server(
1187
1179
)
1188
1180
1189
1181
if server_args .tokenizer_worker_num > 1 :
1190
- port_args_shm , server_args_shm , scheduler_info_shm = (
1191
- write_data_for_multi_tokenizer (
1192
- port_args ,
1193
- server_args ,
1194
- scheduler_info ,
1195
- )
1182
+ multi_tokenizer_args_shm = write_data_for_multi_tokenizer (
1183
+ port_args ,
1184
+ server_args ,
1185
+ scheduler_info ,
1196
1186
)
1197
1187
else :
1198
1188
# Add api key authorization
@@ -1239,6 +1229,7 @@ def launch_server(
1239
1229
workers = server_args .tokenizer_worker_num ,
1240
1230
)
1241
1231
else :
1232
+ app .is_single_tokenizer_mode = True
1242
1233
uvicorn .run (
1243
1234
app ,
1244
1235
host = server_args .host ,
@@ -1249,10 +1240,8 @@ def launch_server(
1249
1240
)
1250
1241
finally :
1251
1242
if server_args .tokenizer_worker_num > 1 :
1252
- port_args_shm .unlink ()
1253
- server_args_shm .unlink ()
1254
- scheduler_info_shm .unlink ()
1255
- _global_state .tokenizer_manager .clear_tokenizer_mapping ()
1243
+ multi_tokenizer_args_shm .unlink ()
1244
+ _global_state .tokenizer_manager .socket_mapping .clear_all_sockets ()
1256
1245
else :
1257
1246
warmup_thread .join ()
1258
1247
0 commit comments