Skip to content

Commit

Permalink
fix: allow only one request in the python backends
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
  • Loading branch information
mudler committed Aug 18, 2023
1 parent 91b1902 commit 547c926
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 5 deletions.
2 changes: 1 addition & 1 deletion extra/grpc/autogptq/autogptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def PredictStream(self, request, context):


def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
Expand Down
2 changes: 1 addition & 1 deletion extra/grpc/bark/ttsbark.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def TTS(self, request, context):
return backend_pb2.Result(success=True)

def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
Expand Down
2 changes: 1 addition & 1 deletion extra/grpc/diffusers/backend_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def GenerateImage(self, request, context):
return backend_pb2.Result(message="Model loaded successfully", success=True)

def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
Expand Down
2 changes: 1 addition & 1 deletion extra/grpc/exllama/exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def PredictStream(self, request, context):


def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
Expand Down
2 changes: 1 addition & 1 deletion extra/grpc/huggingface/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def Embedding(self, request, context):


def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
Expand Down
1 change: 1 addition & 0 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er

if o.singleActiveBackend {
ml.mu.Lock()
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
ml.StopAllExcept(o.model)
ml.mu.Unlock()
}
Expand Down
1 change: 1 addition & 0 deletions pkg/model/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func (ml *ModelLoader) StopAllExcept(s string) {
for ml.models[id].IsBusy() {
log.Debug().Msgf("%s busy. Waiting for to finish", id)
}
log.Debug().Msgf("[single-backend] Stopping %s", id)
return true
}
return false
Expand Down

0 comments on commit 547c926

Please sign in to comment.