Skip to content

Commit

Permalink
feat: single model, single optimizer template (#34)
Browse files Browse the repository at this point in the history
* feat: add single model, single optimizer template

* Update README.md

* feat: archive as python package system

* feat: add hubconf, tests for single, TODOs tip

* tip improvement

* readme improve, script for find and replace

* kwargs to Checkpoint handler

* mimic tree

* feat: more utils functions and tests

* feat: handlers usage in main.py, more comments

* feat: add custom additional events, fix #30

* fix: all in 1 deps install, amp option fix #32

* fix: model, opt, loss, lr_s from initialize, fix #32

* chore: up README about distributed launching

* feat: add resume_from command line argument to resume from checkpoint, fix #31

* fix: comments and docstring
  • Loading branch information
Jeff Yang committed Apr 2, 2021
1 parent c5e511e commit ca80e3d
Show file tree
Hide file tree
Showing 31 changed files with 1,776 additions and 169 deletions.
6 changes: 3 additions & 3 deletions .github/run_code_style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

set -xeu

if [ $1 = "lint" ]; then
if [ $1 == "lint" ]; then
flake8 app templates tests --config .flake8
isort app templates tests --check --settings pyproject.toml
black app templates tests --check --config pyproject.toml
elif [ $1 = "fmt" ]; then
elif [ $1 == "fmt" ]; then
isort app templates tests --color --settings pyproject.toml
black app templates tests --config pyproject.toml
elif [ $1 = "install" ]; then
elif [ $1 == "install" ]; then
pip install flake8 "black==20.8b1" "isort==5.7.0"
fi
6 changes: 3 additions & 3 deletions .github/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

set -xeu

if [ $1 = "generate" ]; then
if [ $1 == "generate" ]; then
python ./tests/generate.py
elif [ $1 = "unittest" ]; then
elif [ $1 == "unittest" ]; then
pytest ./tests/unittest -vvv -ra --color=yes --durations=0
elif [ $1 = "integration" ]; then
elif [ $1 == "integration" ]; then
for file in $(find ./tests/integration -iname "*.sh")
do
bash $file
Expand Down
36 changes: 20 additions & 16 deletions app/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, templates_dir: str = "./templates", dist_dir: str = "./dist")
self.dist_dir = Path(dist_dir)
self.template_list = [p.stem for p in self.templates_dir.iterdir() if p.is_dir() and not p.stem.startswith("_")]
self.rendered_code = {t: {} for t in self.template_list}
self.available_archive_formats = sorted(map(lambda x: x[0], shutil.get_archive_formats()), reverse=True)
self.available_archive_formats = [x[0] for x in shutil.get_archive_formats()[::-1]]

def render_templates(self, template_name: str, config: dict):
"""Renders all the templates files from template folder for the given config."""
Expand All @@ -31,27 +31,31 @@ def render_templates(self, template_name: str, config: dict):
self.rendered_code[template_name][fname] = code
yield fname, code

def mk_dist_template_dir(self, template_name: str):
self.dist_template_dir = Path(f"{self.dist_dir}/{template_name}")
self.dist_template_dir.mkdir(parents=True, exist_ok=True)
def make_and_write(self, template_name: str):
"""Make the directories first and write to the files"""
for p in (self.templates_dir / template_name).rglob("*"):
if not p.stem.startswith("_") and p.is_dir():
# p is templates/template_name/...
# remove "templates" from p.parts and join with "/", so we'll have
# template_name/...
p = "/".join(p.parts[1:])
else:
p = template_name

def write_file(self, fname: str, code: str) -> None:
"""Creates `fname` with content `code` in `dist_dir/template_name`."""
(self.dist_template_dir / fname).write_text(code)
if not (self.dist_dir / p).is_dir():
(self.dist_dir / p).mkdir(parents=True, exist_ok=True)

def write_files(self, template_name):
"""Writes all rendered code for the specified template."""
# Save files with rendered code to the disk
for fname, code in self.rendered_code[template_name].items():
self.write_file(fname, code)
(self.dist_dir / template_name / fname).write_text(code)

def make_archive(self, template_name, archive_format):
"""Creates dist dir with generated code, then makes the archive."""
self.mk_dist_template_dir(template_name)
self.write_files(template_name)

self.make_and_write(template_name)
archive_fname = shutil.make_archive(
base_name=str(self.dist_template_dir),
base_name=template_name,
root_dir=self.dist_dir,
format=archive_format,
base_dir=self.dist_template_dir,
base_dir=template_name,
)
return archive_fname
return shutil.move(archive_fname, self.dist_dir / archive_fname.split("/")[-1])
71 changes: 61 additions & 10 deletions app/streamlit_app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
import shutil
from pathlib import Path
from subprocess import check_output

import streamlit as st
from codegen import CodeGenerator
Expand All @@ -10,6 +8,22 @@
__version__ = "0.1.0"


FOLDER_TO_TEMPLATE_NAME = {
"Single Model, Single Optimizer": "single",
"Generative Adversarial Network": "gan",
"Image Classification": "image_classification",
}

TIP = """
> **💡 TIP**
>
> To quickly adapt to the generated code structure, there are TODOs in the files that are needed to be edited.
> [PyCharm TODO comments](https://www.jetbrains.com/help/pycharm/using-todo.html) or
> [VSCode Todo Tree](https://marketplace.visualstudio.com/items?itemName=Gruntfuggly.todo-tree)
> can help you find them easily.
"""


class App:
page_title = "Code Generator"
page_icon = "https://raw.githubusercontent.com/pytorch/ignite/master/assets/logo/ignite_logomark.svg"
Expand Down Expand Up @@ -39,18 +53,19 @@ def sidebar(self, template_list=None, config=None):
template_list = template_list or []
st.markdown("### Choose a Template")
self.template_name = st.selectbox("Available Templates are:", options=template_list)
self.template_name = FOLDER_TO_TEMPLATE_NAME[self.template_name]
with st.sidebar:
if self.template_name:
config = config(self.template_name)
self.config = config.get_configs()
else:
self.config = {}

def render_code(self, fname="", code=""):
def render_code(self, fname: str = "", code: str = ""):
"""Main content with the code."""
with st.beta_expander(f"View rendered {fname}"):
with st.beta_expander(f"View rendered {fname}", expanded=fname.endswith(".md")):
if fname.endswith(".md"):
st.markdown(code)
st.markdown(code, unsafe_allow_html=True)
else:
col1, col2 = st.beta_columns([1, 20])
with col1:
Expand All @@ -59,22 +74,57 @@ def render_code(self, fname="", code=""):
st.code(code)

def render_directory(self, dir):
output = check_output(["tree", dir], encoding="utf-8")
"""tree command is not available in all systems."""
output = f"{dir}\n"
# https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python
# prefix components:
space = " "
branch = "│ "
# pointers:
tee = "├── "
last = "└── "
file_count = 0
dir_count = 0

def tree(dir_path: Path, prefix: str = ""):
"""A recursive generator, given a directory Path object
will yield a visual tree structure line by line
with each line prefixed by the same characters
"""
nonlocal file_count
nonlocal dir_count
contents = sorted(dir_path.iterdir())
# contents each get pointers that are ├── with a final └── :
pointers = [tee] * (len(contents) - 1) + [last]
for pointer, path in zip(pointers, contents):
if path.is_file():
file_count += 1
yield prefix + pointer + path.name
if path.is_dir(): # extend the prefix and recurse:
dir_count += 1
extension = branch if pointer == tee else space
# i.e. space because last, └── , above so no more |
yield from tree(path, prefix=prefix + extension)

for line in tree(dir):
output += line + "\n"
output += f"\n{dir_count} directories, {file_count} files"
st.markdown("Generated files and directory structure")
st.code(output)

def add_sidebar(self):
def config(template_name):
return import_from_file("template_config", f"./templates/{template_name}/_sidebar.py")

self.sidebar(self.codegen.template_list, config)
self.sidebar([*FOLDER_TO_TEMPLATE_NAME], config)

def add_content(self):
"""Get generated/rendered code from the codegen."""
content = [*self.codegen.render_templates(self.template_name, self.config)]
if st.checkbox("View rendered code ?"):
if st.checkbox("View rendered code ?", value=True):
for fname, code in content:
self.render_code(fname, code)
if len(code): # don't show files which don't have content in them
self.render_code(fname, code)

def add_download(self):
st.markdown("")
Expand All @@ -94,12 +144,13 @@ def add_download(self):
shutil.copy(archive_fname, dist_path)
st.success(f"Download link : [{archive_fname}](./static/{archive_fname})")
with col2:
self.render_directory(os.path.join(self.codegen.dist_dir, self.template_name))
self.render_directory(Path(self.codegen.dist_dir, self.template_name))

def run(self):
self.add_sidebar()
self.add_content()
self.add_download()
st.info(TIP)


def main():
Expand Down
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
-r requirements.txt

# dev
pytorch-ignite
torch
Expand Down
141 changes: 141 additions & 0 deletions templates/_base/_argparse.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
{% block imports %}
from argparse import ArgumentParser
{% endblock %}

{% block defaults %}
DEFAULTS = {
"use_amp": {
"action": "store_true",
"help": "use torch.cuda.amp for automatic mixed precision"
},
"resume_from": {
"default": None,
"type": str,
"help": "path to the checkpoint file to resume, can also url starting with https (None)"
},
"seed": {
"default": 666,
"type": int,
"help": "seed to use in ignite.utils.manual_seed() (666)"
},
"verbose": {
"action": "store_true",
"help": "use logging.INFO in ignite.utils.setup_logger",
},

# distributed training options
"backend": {
"default": None,
"type": str,
"help": "backend to use for distributed training (None)",
},
"nproc_per_node": {
"default": {{nproc_per_node}},
"type": int,
"help": """number of processes to launch on each node, for GPU training
this is recommended to be set to the number of GPUs in your system
so that each process can be bound to a single GPU ({{ nproc_per_node }})""",
},
"nnodes": {
"default": {{nnodes}},
"type": int,
"help": "number of nodes to use for distributed training ({{ nnodes }})",
},
"node_rank": {
"default": {{node_rank}},
"type": int,
"help": "rank of the node for multi-node distributed training ({{ node_rank }})",
},
"master_addr": {
"default": {{master_addr}},
"type": str,
"help": "master node TCP/IP address for torch native backends ({{ master_addr }})",
},
"master_port": {
"default": {{master_port}},
"type": int,
"help": "master node port for torch native backends ({{ master_port }})",
},

# ignite handlers options
"output_path": {
"default": "{{output_path}}",
"type": str,
"help": "output path to indicate where to_save objects are stored ({{output_path}})",
},
"save_every_iters": {
"default": {{save_every_iters}},
"type": int,
"help": "Saving iteration interval ({{save_every_iters}})",
},
"n_saved": {
"default": {{n_saved}},
"type": int,
"help": "number of best models to store ({{ n_saved }})",
},
"log_every_iters": {
"default": {{log_every_iters}},
"type": int,
"help": "logging interval for iteration progress bar ({{log_every_iters}})",
},
"with_pbars": {
"default": {{with_pbars}},
"type": bool,
"help": "show epoch-wise and iteration-wise progress bars ({{with_pbars}})",
},
"with_pbar_on_iters": {
"default": {{with_pbar_on_iters}},
"type": bool,
"help": "show iteration progress bar or not ({{with_pbar_on_iters}})",
},
"stop_on_nan": {
"default": {{stop_on_nan}},
"type": bool,
"help": "stop the training if engine output contains NaN/inf values (stop_on_nan)",
},
"clear_cuda_cache": {
"default": {{clear_cuda_cache}},
"type": bool,
"help": "clear cuda cache every end of epoch ({{clear_cuda_cache}})",
},
"with_gpu_stats": {
"default": {{with_gpu_stats}},
"type": bool,
"help": "show gpu information, requires pynvml ({{with_gpu_stats}})",
},
"patience": {
"default": {{patience}},
"type": int,
"help": "number of events to wait if no improvement and then stop the training ({{patience}})"
},
"limit_sec": {
"default": {{limit_sec}},
"type": int,
"help": "maximum time before training terminates in seconds ({{limit_sec}})"
},

# ignite logger options
"filepath": {
"default": "{{ filepath }}",
"type": str,
"help": "logging file path ({{ filepath }})",
},
"logger_log_every_iters": {
"default": {{logger_log_every_iters}},
"type": int,
"help": "logging interval for experiment tracking system ({{logger_log_every_iters}})",
},
}
{% endblock %}


{% block get_default_parser %}
def get_default_parser() -> ArgumentParser:
"""Get the default configs for training."""
parser = ArgumentParser(add_help=False)

for key, value in DEFAULTS.items():
parser.add_argument(f"--{key}", **value)

return parser
{% endblock %}
28 changes: 28 additions & 0 deletions templates/_base/_events.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
Additional Events to inspect the training at custom events.
"""

from ignite.engine.events import EventEnum


class TrainEvents(EventEnum):
"""Additional Training Events. includes
- BACKWARD_COMPLETED : trigger after calling loss.backward()
- OPTIM_STEP_COMPLETED : trigger after calling optimizer.step()
"""

BACKWARD_COMPLETED = "backward_completed"
OPTIM_STEP_COMPLETED = "optim_step_completed"


# define events and attribute mapping
# so that we can trigger them with custom filter function
train_events_to_attr = {
TrainEvents.BACKWARD_COMPLETED: "backward_completed",
TrainEvents.OPTIM_STEP_COMPLETED: "optim_step_completed",
}

# Any custom events can go below
# fire them in process_function of the respective engine and
# register them with the respective engine in the `engines.py`

0 comments on commit ca80e3d

Please sign in to comment.