Skip to content

Commit

Permalink
feat!: rewrite the service completely
Browse files Browse the repository at this point in the history
BREAKING CHANGE: Instead of using protobuf definitions from arg-services, the package now contains code generated via betterproto. We also added a library that can be used in other Python projects without needing to start a server. Lastly, a proper CLI has been integrated that (besides starting the server) allows to perform semantic retrieval in the local file system.
  • Loading branch information
mirkolenz committed Dec 4, 2023
1 parent b2fe4a8 commit 07a53c6
Show file tree
Hide file tree
Showing 15 changed files with 1,823 additions and 985 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ data/

# nix
.envrc
.direnv/
.devenv/
.direnv
.devenv
/result
.venv
.direnv
83 changes: 83 additions & 0 deletions betterproto.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# https://github.com/NixOS/nixpkgs/blob/master/pkgs/development/python-modules/betterproto/default.nix
{
fetchFromGitHub,
lib,
python,
buildPythonPackage,
poetry-core,
grpclib,
python-dateutil,
typing-extensions,
black,
jinja2,
isort,
pytestCheckHook,
pytest-asyncio,
pytest-cov,
pytest-mock,
pydantic,
protobuf,
cachelib,
tomlkit,
grpcio-tools,
}:
buildPythonPackage rec {
pname = "betterproto";
version = "master";
format = "pyproject";

src = fetchFromGitHub {
owner = "danielgtaylor";
repo = "python-betterproto";
rev = "bd7de203e16e949666b2844b3dec1eb7c4ed523c";
hash = "sha256-ppVS8dfVSXBm7KGv1/um6ePK4pBln+RrizR9EXz40qo=";
};

nativeBuildInputs = [poetry-core];

propagatedBuildInputs = [
grpclib
python-dateutil
typing-extensions
];

passthru.optional-dependencies.compiler = [
black
jinja2
isort
];

nativeCheckInputs =
[
pytestCheckHook
pytest-asyncio
pytest-cov
pytest-mock
pydantic
protobuf
cachelib
tomlkit
grpcio-tools
]
++ passthru.optional-dependencies.compiler;

# The tests require the generation of code before execution. This requires
# the protoc-gen-python_betterproto script from the package to be on PATH.
preCheck = ''
export PATH=$PATH:$out/bin
${python.interpreter} -m tests.generate
'';

pythonImportsCheck = ["betterproto"];

meta = {
description = "Clean, modern, Python 3.6+ code generator & library for Protobuf 3 and async gRPC";
longDescription = ''
This project aims to provide an improved experience when using Protobuf /
gRPC in a modern Python environment by making use of modern language
features and generating readable, understandable, idiomatic Python code.
'';
homepage = "https://github.com/danielgtaylor/python-betterproto";
license = lib.licenses.mit;
};
}
6 changes: 6 additions & 0 deletions buf.gen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: v1
managed:
enabled: true
plugins:
- name: python_betterproto
out: ./gen
36 changes: 18 additions & 18 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
export LD_PRELOAD=$(${pkgs.busybox}/bin/find /lib/x86_64-linux-gnu -name "libcuda.so.*" -type f 2>/dev/null)
exec ${lib.getExe self'.packages.default} "$@"
'';
betterproto = python.pkgs.callPackage ./betterproto.nix {};
betterprotoCompiler = betterproto.overridePythonAttrs (old: {
propagatedBuildInputs = old.propagatedBuildInputs ++ old.passthru.optional-dependencies.compiler;
});
in {
_module.args.pkgs = import nixpkgs {
inherit system;
Expand All @@ -59,11 +63,6 @@
inherit python;
projectDir = ./.;
preferWheels = true;
overrides = pkgs.poetry2nix.overrides.withDefaults (self: super: {
sentence-transformers = super.sentence-transformers.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or []) ++ [super.setuptools];
});
});
};
docker = pkgs.dockerTools.buildImage {
name = "nlp-service";
Expand All @@ -78,6 +77,19 @@
name = "release-env";
paths = [poetry python];
};
betterproto = python.pkgs.toPythonApplication betterprotoCompiler;
bufGenerate = pkgs.writeShellApplication {
name = "buf-generate";
runtimeInputs = [self'.packages.betterproto];
text = ''
${lib.getExe pkgs.buf} generate buf.build/recap/arg-services
{
echo "# type: ignore"
cat ./gen/arg_services/nlp/v1/__init__.py
} > ./nlp_service/nlp_pb.py
rm -rf ./gen
'';
};
};
legacyPackages.dockerManifest = flocken.legacyPackages.${system}.mkDockerManifest {
github = {
Expand All @@ -88,7 +100,7 @@
images = with self.packages; [x86_64-linux.docker];
};
devShells.default = pkgs.mkShell {
packages = [poetry python];
packages = [poetry python self'.packages.bufGenerate];
POETRY_VIRTUALENVS_IN_PROJECT = true;
LD_LIBRARY_PATH = with pkgs; lib.makeLibraryPath [stdenv.cc.cc zlib "/run/opengl-driver"];
shellHook = ''
Expand Down
23 changes: 19 additions & 4 deletions nlp_service/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
import logging

from . import client as client
from . import similarity as similarity
from . import typing as typing
from . import client, sim_funcs, typing
from .lib import doc, docs, similarities, similarity, vector, vectors
from .nlp_pb import EmbeddingModel, EmbeddingType, NlpConfig, Pooling, SimilarityMethod

__all__ = ("client", "similarity", "typing")
__all__ = (
"client",
"sim_funcs",
"typing",
"docs",
"doc",
"vectors",
"vector",
"similarities",
"similarity",
"NlpConfig",
"EmbeddingModel",
"SimilarityMethod",
"EmbeddingType",
"Pooling",
)

logging.getLogger(__name__).addHandler(logging.NullHandler())
2 changes: 1 addition & 1 deletion nlp_service/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from nlp_service.server import app
from nlp_service.cli import app

app()

0 comments on commit 07a53c6

Please sign in to comment.