diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..358e6c9 --- /dev/null +++ b/.flake8 @@ -0,0 +1,10 @@ +[flake8] +ignore=B008,E121,E123,E126,E203,E226,E24,E704,W503,W504,D100,D105,D200,D202,D301,D402,D +max-line-length=88 +exclude= + .git +import-order-style=appnexus +application-package-names=data_generators +per-file-ignores= + **/tests/*:D + **/test*.py:D diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..731e424 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,10 @@ +[MASTER] +jobs=0 +load-plugins=pylint.extensions.docparams + +[MESSAGES CONTROL] +disable=all +enable=missing-param-doc, + differing-param-doc, + differing-type-doc, + missing-return-doc diff --git a/conf/requirements-dev.txt b/conf/requirements-dev.txt new file mode 100644 index 0000000..2e4ba72 --- /dev/null +++ b/conf/requirements-dev.txt @@ -0,0 +1,7 @@ +flake8 +flake8-bugbear +flake8-docstrings +flake8-import-order +pylint +mypy +black diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..4a90ec4 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,4 @@ +[mypy] +disallow_untyped_defs = True +ignore_missing_imports = True +strict_optional = False \ No newline at end of file diff --git a/notebooks/codesearchnet-opennmt.py b/notebooks/codesearchnet-opennmt.py index b148061..321f301 100644 --- a/notebooks/codesearchnet-opennmt.py +++ b/notebooks/codesearchnet-opennmt.py @@ -1,16 +1,17 @@ -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace import logging from pathlib import Path from time import time -from typing import List +from typing import List, Tuple import pandas as pd +from torch.utils.data import Dataset logging.basicConfig(level=logging.INFO) -class CodeSearchNetRAM(object): +class CodeSearchNetRAM(Dataset): """Stores one split of CodeSearchNet data in memory Usage example: @@ -50,7 +51,7 @@ def _jsonl_list_to_dataframe( sort=False, ) - def __getitem__(self, idx: int): + def __getitem__(self, idx: int) -> Tuple[str, str]: row = self.pd.iloc[idx] # drop class name @@ -65,11 +66,11 @@ def __getitem__(self, idx: int): # fn_body_enc = self.enc.encode(fn_body) return (fn_name, fn_body) - def __len__(self): + def __len__(self) -> int: return len(self.pd) -def main(args): +def main(args: Namespace) -> None: dataset = CodeSearchNetRAM(Path(args.data_dir), args.newline) split_name = Path(args.data_dir).name with open(args.src_file % split_name, mode="w", encoding="utf8") as s, open(