Skip to content

Commit

Permalink
Pre-commit updates (#2427)
Browse files Browse the repository at this point in the history
Ref #2421 
Fix #2908

Updates older pre-commit hooks (initial exp:
#680), mostly based (again)
on [tox
configs](https://github.com/tox-dev/tox/blob/main/.pre-commit-config.yaml)

Use ruff instead of pyupgrade/isort/black/flake8

This PR has a couple of commits: the first ones update configs, the last
one runs `tox -e fix_lint` to apply pre-commit. Mostly looking into
updating the first commits without breaking tests on the last commit.
  • Loading branch information
luizirber committed Feb 5, 2024
1 parent b265415 commit fee6292
Show file tree
Hide file tree
Showing 152 changed files with 26,467 additions and 16,243 deletions.
95 changes: 25 additions & 70 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,71 +1,26 @@
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: check-ast
# - id: check-builtin-literals
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
# - id: end-of-file-fixer
# exclude: 'tests/test-data'
# - id: trailing-whitespace
# exclude: 'tests/test-data'
#- repo: https://github.com/asottile/pyupgrade
# rev: v2.7.2
# hooks:
# - id: pyupgrade
#- repo: https://github.com/pre-commit/mirrors-isort
# rev: v5.4.2
# hooks:
# - id: isort
# additional_dependencies: [toml]

# format using black
# when the full codebase is black, use it directly;
# while it isn't, let's use darker to format new/changed code
- repo: https://github.com/akaihola/darker
rev: 1.7.1
hooks:
- id: darker
#- repo: https://github.com/psf/black
# rev: 20.8b1
# hooks:
# - id: black
# args:
# - --safe
# language_version: python3.8
#- repo: https://github.com/asottile/blacken-docs
# rev: v1.8.0
# hooks:
# - id: blacken-docs
# additional_dependencies:
# - black==19.10b0
# language_version: python3.8

#- repo: https://github.com/asottile/add-trailing-comma
# rev: v2.0.1
# hooks:
# - id: add-trailing-comma
#- repo: https://github.com/pre-commit/pygrep-hooks
# rev: v1.6.0
# hooks:
# - id: rst-backticks
#- repo: https://github.com/asottile/setup-cfg-fmt
# rev: v1.11.0
# hooks:
# - id: setup-cfg-fmt
# args:
# - --min-py3-version
# - '3.7'
#- repo: https://gitlab.com/pycqa/flake8
# rev: 3.8.3
# hooks:
# - id: flake8
# additional_dependencies:
# - flake8-bugbear == 20.1.2
# language_version: python3.8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-ast
- id: check-builtin-literals
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.0
hooks:
- id: ruff-format
- id: ruff
args: ["--fix", "--unsafe-fixes", "--exit-non-zero-on-fix"]
- repo: https://github.com/tox-dev/tox-ini-fmt
rev: "0.5.2"
hooks:
- id: tox-ini-fmt
args: ["-p", "fix_lint"]
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes
65 changes: 36 additions & 29 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,44 @@
from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash

RANDOM_SEQ_SIZE=3000
RANDOM_SEQ_NUMBER=300
RANDOM_SEQ_SIZE = 3000
RANDOM_SEQ_NUMBER = 300

MINHASH_NUM=500
MINHASH_K=21
MINHASH_NUM = 500
MINHASH_K = 21

GET_MINS_RANGE=500
ADD_HASH_RANGE=10_000
ADD_MANY_RANGE=1000
SIMILARITY_TIMES=500
COUNT_COMMON_TIMES=500
MERGE_TIMES=500
COPY_TIMES=500
CONCAT_TIMES=500
SET_ABUNDANCES_RANGE=500
ZIP_STORAGE_WRITE=100_000
ZIP_STORAGE_LOAD=20
GET_MINS_RANGE = 500
ADD_HASH_RANGE = 10_000
ADD_MANY_RANGE = 1000
SIMILARITY_TIMES = 500
COUNT_COMMON_TIMES = 500
MERGE_TIMES = 500
COPY_TIMES = 500
CONCAT_TIMES = 500
SET_ABUNDANCES_RANGE = 500
ZIP_STORAGE_WRITE = 100_000
ZIP_STORAGE_LOAD = 20


def load_sequences():
sequences = []
for i in range(10):
random_seq = random.sample("A,C,G,T".split(",") * RANDOM_SEQ_SIZE,
RANDOM_SEQ_NUMBER)
random_seq = random.sample(
"A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
)
sequences.append("".join(random_seq))
return sequences


class TimeMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K, is_protein=True,
track_abundance=False)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)
self.sequences = load_sequences()

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K,
track_abundance=False)
self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

Expand Down Expand Up @@ -103,8 +104,9 @@ def time_concat(self):
class PeakmemMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K,
is_protein=True, track_abundance=True)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=True
)
self.sequences = load_sequences()

def peakmem_add_sequence(self):
Expand Down Expand Up @@ -158,21 +160,25 @@ def time_set_abundances_noclear(self):
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)


####################

class TimeZipStorageSuite:

class TimeZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
Expand All @@ -196,17 +202,18 @@ def teardown(self):
class PeakmemZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)


def peakmem_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
Expand Down

0 comments on commit fee6292

Please sign in to comment.