diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..358e6c9
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,10 @@
+[flake8]
+ignore=B008,E121,E123,E126,E203,E226,E24,E704,W503,W504,D100,D105,D200,D202,D301,D402,D
+max-line-length=88
+exclude=
+    .git
+import-order-style=appnexus
+application-package-names=data_generators
+per-file-ignores=
+    **/tests/*:D
+    **/test*.py:D
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..731e424
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,10 @@
+[MASTER]
+jobs=0
+load-plugins=pylint.extensions.docparams
+
+[MESSAGES CONTROL]
+disable=all
+enable=missing-param-doc,
+       differing-param-doc,
+       differing-type-doc,
+       missing-return-doc
diff --git a/conf/requirements-dev.txt b/conf/requirements-dev.txt
new file mode 100644
index 0000000..2e4ba72
--- /dev/null
+++ b/conf/requirements-dev.txt
@@ -0,0 +1,7 @@
+flake8
+flake8-bugbear
+flake8-docstrings
+flake8-import-order
+pylint
+mypy
+black
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..4a90ec4
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,4 @@
+[mypy]
+disallow_untyped_defs = True
+ignore_missing_imports = True
+strict_optional = False
\ No newline at end of file
diff --git a/notebooks/codesearchnet-opennmt.py b/notebooks/codesearchnet-opennmt.py
index b148061..321f301 100644
--- a/notebooks/codesearchnet-opennmt.py
+++ b/notebooks/codesearchnet-opennmt.py
@@ -1,16 +1,17 @@
-from argparse import ArgumentParser
+from argparse import ArgumentParser, Namespace
 import logging
 from pathlib import Path
 from time import time
-from typing import List
+from typing import List, Tuple
 
 import pandas as pd
+from torch.utils.data import Dataset
 
 
 logging.basicConfig(level=logging.INFO)
 
 
-class CodeSearchNetRAM(object):
+class CodeSearchNetRAM(Dataset):
     """Stores one split of CodeSearchNet data in memory
 
     Usage example:
@@ -50,7 +51,7 @@ def _jsonl_list_to_dataframe(
             sort=False,
         )
 
-    def __getitem__(self, idx: int):
+    def __getitem__(self, idx: int) -> Tuple[str, str]:
         row = self.pd.iloc[idx]
 
         # drop class name
@@ -65,11 +66,11 @@ def __getitem__(self, idx: int):
         # fn_body_enc = self.enc.encode(fn_body)
         return (fn_name, fn_body)
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self.pd)
 
 
-def main(args):
+def main(args: Namespace) -> None:
     dataset = CodeSearchNetRAM(Path(args.data_dir), args.newline)
     split_name = Path(args.data_dir).name
     with open(args.src_file % split_name, mode="w", encoding="utf8") as s, open(