Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: config black format checker #299

Merged
merged 3 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 6 additions & 8 deletions .github/workflows/client.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ on:
- main

jobs:

filter:
runs-on: ubuntu-latest

Expand All @@ -25,7 +24,6 @@ jobs:
base: main
filters: .github/file-filter.yml


codestyle:
runs-on: ubuntu-latest
defaults:
Expand All @@ -42,8 +40,8 @@ jobs:
- name: Setup python
uses: actions/setup-python@v3
with:
python-version: '3.7'
architecture: 'x64'
python-version: "3.7"
architecture: "x64"

- name: Get pip cache
id: pip-cache-path
Expand All @@ -64,7 +62,7 @@ jobs:

- name: Black format check
working-directory: ./client
run: make ci-format
run: make ci-format-checker
- name: Lint check
working-directory: ./client
run: make ci-lint
Expand All @@ -88,8 +86,8 @@ jobs:
- name: Setup python
uses: actions/setup-python@v3
with:
python-version: '3.7'
architecture: 'x64'
python-version: "3.7"
architecture: "x64"

- name: Get pip cache
id: pip-cache-path
Expand Down Expand Up @@ -120,4 +118,4 @@ jobs:
directory: ./client/coverage/reports/
files: ./coverage.xml
flags: unittests
verbose: true
verbose: true
9 changes: 7 additions & 2 deletions client/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
PY_CHANGED_FILES = $(shell git diff --name-only --relative -- '*.py')

check:
python3 setup.py check

Expand All @@ -17,8 +19,11 @@ build-dev-wheel:
install-dev-req:
python3 -m pip install -r requirements-dev.txt

ci-format:
echo "ci format"
black-format:
black --config pyproject.toml $(PY_CHANGED_FILES)

ci-format-checker:
black --check --config pyproject.toml .

ci-lint:
echo "ci lint"
Expand Down
16 changes: 16 additions & 0 deletions client/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.black]
include = '\.pyi?$'
exclude = '''
(
/(
\.eggs
| \.egg-info
| \.git
| \.vscode
| \.venv
| venv
| build
| dist
)/
)
'''
75 changes: 38 additions & 37 deletions client/setup.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
from setuptools import setup, find_packages

install_requires = [
'click>=8.0.4',
'importlib-metadata==4.8.2',
'attrs==21.4.0',
'pyyaml==6.0',
'cattrs==1.7.1',
'requests>=2.1.0',
'requests-toolbelt>=0.9.0',
'loguru==0.6.0',
'conda-pack==0.6.0',
'virtualenv>=13.0.0',
'fs>=2.4.0',
'typing-extensions>=4.0.0',
'commonmark>=0.9.1',
'rich==12.0.0',
'jsonlines==3.0.0',
'boto3==1.21.0',
"scikit-learn>=0.20.0"
"click>=8.0.4",
"importlib-metadata==4.8.2",
"attrs==21.4.0",
"pyyaml==6.0",
"cattrs==1.7.1",
"requests>=2.1.0",
"requests-toolbelt>=0.9.0",
"loguru==0.6.0",
"conda-pack==0.6.0",
"virtualenv>=13.0.0",
"fs>=2.4.0",
"typing-extensions>=4.0.0",
"commonmark>=0.9.1",
"rich==12.0.0",
"jsonlines==3.0.0",
"boto3==1.21.0",
"scikit-learn>=0.20.0",
]


setup(name='starwhale',
author='Starwhale Team',
author_email="developer@starwhale.ai",
version="0.1.0.dev15",
description='MLOps Platform',
keywords="MLOps AI",
url='https://github.com/star-whale/starwhale',
license='Apache-2.0',
packages=find_packages(exclude=['ez_setup', 'tests*']),
include_package_data=True,
install_requires=install_requires,
zip_safe=False,
entry_points="""
setup(
name="starwhale",
author="Starwhale Team",
author_email="developer@starwhale.ai",
version="0.1.0.dev15",
description="MLOps Platform",
keywords="MLOps AI",
url="https://github.com/star-whale/starwhale",
license="Apache-2.0",
packages=find_packages(exclude=["ez_setup", "tests*"]),
include_package_data=True,
install_requires=install_requires,
zip_safe=False,
entry_points="""
[console_scripts]
swcli = starwhale.cli:cli
sw = starwhale.cli:cli
starwhale = starwhale.cli:cli
""",
python_requires = ">=3.7.0",
scripts=[
'scripts/sw-docker-entrypoint',
],
package_data={
"starwhale": ['swmp/templates/Dockerfile'],
},
python_requires=">=3.7.0",
scripts=[
"scripts/sw-docker-entrypoint",
],
package_data={
"starwhale": ["swmp/templates/Dockerfile"],
},
)
2 changes: 1 addition & 1 deletion client/starwhale/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
os.environ["SW_VERSION"] = __version__


#TODO: only export api
# TODO: only export api
4 changes: 2 additions & 2 deletions client/starwhale/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

if __name__ == "__main__":
from starwhale.cli import cli
cli()

cli()
60 changes: 36 additions & 24 deletions client/starwhale/api/_impl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
import jsonlines

from starwhale.swds.dataset import (
D_ALIGNMENT_SIZE, D_USER_BATCH_SIZE, D_FILE_VOLUME_SIZE
D_ALIGNMENT_SIZE,
D_USER_BATCH_SIZE,
D_FILE_VOLUME_SIZE,
)
from starwhale.consts import SWDS_DATA_FNAME_FMT, SWDS_LABEL_FNAME_FMT

#TODO: tune header size
# TODO: tune header size
_header_magic = struct.unpack(">I", b"SWDS")[0]
_data_magic = struct.unpack(">I", b"SDWS")[0]
_header_struct = struct.Struct(">IIQIIII")
Expand All @@ -34,7 +36,8 @@ class BuildExecutor(object):
data bytes...
padding bytes... --> default 4K padding
"""
#TODO: add more docstring for class

# TODO: add more docstring for class

__metaclass__ = ABCMeta

Expand All @@ -44,16 +47,18 @@ class BuildExecutor(object):
_DATA_FMT = SWDS_DATA_FNAME_FMT
_LABEL_FMT = SWDS_LABEL_FNAME_FMT

def __init__(self,
data_dir:Path = Path("."),
output_dir: Path = Path("./sw_output"),
data_filter:str ="*", label_filter:str ="*",
batch:int =D_USER_BATCH_SIZE,
alignment_bytes_size:int = D_ALIGNMENT_SIZE,
volume_bytes_size:int = D_FILE_VOLUME_SIZE,
) -> None:
#TODO: add more docstring for args
#TODO: validate group upper and lower?
def __init__(
self,
data_dir: Path = Path("."),
output_dir: Path = Path("./sw_output"),
data_filter: str = "*",
label_filter: str = "*",
batch: int = D_USER_BATCH_SIZE,
alignment_bytes_size: int = D_ALIGNMENT_SIZE,
volume_bytes_size: int = D_FILE_VOLUME_SIZE,
) -> None:
# TODO: add more docstring for args
# TODO: validate group upper and lower?
self._batch = max(batch, 1)
self.data_dir = data_dir
self.data_filter = data_filter
Expand All @@ -67,26 +72,28 @@ def __init__(self,

def _prepare(self):
self.output_dir.mkdir(parents=True, exist_ok=True)
self._index_writer = jsonlines.open(str((self.output_dir / self.INDEX_NAME).resolve()), mode="w")
self._index_writer = jsonlines.open(
str((self.output_dir / self.INDEX_NAME).resolve()), mode="w"
)

def __exit__(self):
try:
self._index_writer.close() # type: ignore
self._index_writer.close() # type: ignore
except Exception as e:
print(f"index writer close exception: {e}")

print("cleanup done.")

def _write(self, writer, idx: int, data: bytes) -> t.Tuple[int, int]:
size = len(data)
crc = crc32(data) #TODO: crc is right?
crc = crc32(data) # TODO: crc is right?
start = writer.tell()
padding_size = self._get_padding_size(size + _header_size)

_header = _header_struct.pack(
_header_magic, crc, idx, size, padding_size, self._batch, _data_magic
)
_padding = b'\0' * padding_size
_padding = b"\0" * padding_size
writer.write(_header + data + _padding)
return start, _header_size + size + padding_size

Expand All @@ -108,17 +115,19 @@ def _write_index(self, idx, fno, data_pos, data_size, label_pos, label_size):
file=self._LABEL_FMT.format(index=fno),
offset=label_pos,
size=label_size,
)
),
)
)

def make_swds(self):
#TODO: add lock
# TODO: add lock
fno, wrote_size = 0, 0
dwriter = (self.output_dir / self._DATA_FMT.format(index=fno)).open("wb")
lwriter = (self.output_dir / self._LABEL_FMT.format(index=fno)).open("wb")

for idx, (data, label) in enumerate(zip(self.iter_all_dataset_slice(), self.iter_all_label_slice())):
for idx, (data, label) in enumerate(
zip(self.iter_all_dataset_slice(), self.iter_all_label_slice())
):
data_pos, data_size = self._write(dwriter, idx, data)
label_pos, label_size = self._write(lwriter, idx, label)
self._write_index(idx, fno, data_pos, data_size, label_pos, label_size)
Expand All @@ -131,8 +140,12 @@ def make_swds(self):
dwriter.close()
lwriter.close()

dwriter = (self.output_dir / self._DATA_FMT.format(index=fno)).open("wb")
lwriter = (self.output_dir / self._LABEL_FMT.format(index=fno)).open("wb")
dwriter = (self.output_dir / self._DATA_FMT.format(index=fno)).open(
"wb"
)
lwriter = (self.output_dir / self._LABEL_FMT.format(index=fno)).open(
"wb"
)

try:
dwriter.close()
Expand Down Expand Up @@ -183,7 +196,6 @@ def label_sort_func(self):


class MNISTBuildExecutor(BuildExecutor):

def iter_data_slice(self, path: str):
fpath = Path(path)

Expand Down Expand Up @@ -211,4 +223,4 @@ def iter_label_slice(self, path: str):
yield content


#TODO: define some open dataset class, like ImageNet, COCO
# TODO: define some open dataset class, like ImageNet, COCO