Skip to content

Commit

Permalink
Format code on master (black + isort) (#538)
Browse files Browse the repository at this point in the history
* Config files

* Add autoflake

* Update isort exclude; add pre-commit to requirements

* Manually fix a few bad cases
  • Loading branch information
lihuoran committed Jun 13, 2022
1 parent 17ad489 commit 7e3c1d5
Show file tree
Hide file tree
Showing 324 changed files with 5,620 additions and 3,595 deletions.
7 changes: 7 additions & 0 deletions .github/linters/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[tool.black]
line-length = 120

[tool.isort]
profile = "black"
line_length = 120
known_first_party = "maro"
15 changes: 4 additions & 11 deletions .github/linters/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ ignore =
# line break after binary operator
W504,
# line break before binary operator
W503
W503,
# whitespace before ':'
E203

exclude =
.git,
Expand All @@ -27,14 +29,5 @@ max-line-length = 120
per-file-ignores =
# import not used: ignore in __init__.py files
__init__.py:F401
# igore invalid escape sequence in cli main script to show banner
# ignore invalid escape sequence in cli main script to show banner
maro.py:W605

[isort]
indent = " "
line_length = 120
use_parentheses = True
multi_line_output = 6
known_first_party = maro
filter_files = True
skip_glob = maro/__init__.py, tests/*, examples/*, setup.py
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ jobs:
uses: github/super-linter@latest
env:
VALIDATE_ALL_CODEBASE: false
VALIDATE_PYTHON_PYLINT: false # disable pylint, as we have not configure it
VALIDATE_PYTHON_BLACK: false # same as above
VALIDATE_PYTHON_PYLINT: false # disable pylint, as we have not configured it
VALIDATE_PYTHON_MYPY: false # same as above
VALIDATE_JSCPD: false # Can not exclude specific file: https://github.com/kucherenko/jscpd/issues/215
PYTHON_FLAKE8_CONFIG_FILE: tox.ini
PYTHON_ISORT_CONFIG_FILE: tox.ini
PYTHON_BLACK_CONFIG_FILE: pyproject.toml
PYTHON_ISORT_CONFIG_FILE: pyproject.toml
EDITORCONFIG_FILE_NAME: ../../.editorconfig
FILTER_REGEX_INCLUDE: maro/.*
FILTER_REGEX_EXCLUDE: tests/.*
Expand Down
51 changes: 51 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

repos:
- repo: https://github.com/myint/autoflake
rev: v1.4
hooks:
- id: autoflake
args:
- --in-place
- --remove-unused-variables
- --remove-all-unused-imports
exclude: .*/__init__\.py|setup\.py
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
args:
- --settings-path=.github/linters/pyproject.toml
- --check
- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma
name: add-trailing-comma (1st round)
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
name: black (1st round)
args:
- --config=.github/linters/pyproject.toml
- repo: https://github.com/asottile/add-trailing-comma
rev: v2.2.3
hooks:
- id: add-trailing-comma
name: add-trailing-comma (2nd round)
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
name: black (2nd round)
args:
- --config=.github/linters/pyproject.toml
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.9
hooks:
- id: flake8
args:
- --config=.github/linters/tox.ini
exclude: \.git|__pycache__|docs|build|dist|.*\.egg-info|docker_files|\.vscode|\.github|scripts|tests|maro\/backends\/.*.cp|setup.py
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@


# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
Expand Down
8 changes: 3 additions & 5 deletions examples/cim/rl/algorithms/ac.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Dict

import torch
from torch.optim import Adam, RMSprop

from maro.rl.model import DiscreteACBasedNet, FullyConnected, VNet
from maro.rl.policy import DiscretePolicyGradient
from maro.rl.training.algorithms import ActorCriticTrainer, ActorCriticParams
from maro.rl.training.algorithms import ActorCriticParams, ActorCriticTrainer

actor_net_conf = {
"hidden_dims": [256, 128, 64],
Expand Down Expand Up @@ -58,10 +56,10 @@ def get_ac(state_dim: int, name: str) -> ActorCriticTrainer:
name=name,
params=ActorCriticParams(
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
reward_discount=.0,
reward_discount=0.0,
grad_iters=10,
critic_loss_cls=torch.nn.SmoothL1Loss,
min_logp=None,
lam=.0,
lam=0.0,
),
)
26 changes: 14 additions & 12 deletions examples/cim/rl/algorithms/dqn.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Dict

import torch
from torch.optim import RMSprop

from maro.rl.exploration import MultiLinearExplorationScheduler, epsilon_greedy
from maro.rl.model import DiscreteQNet, FullyConnected
from maro.rl.policy import ValueBasedPolicy
from maro.rl.training.algorithms import DQNTrainer, DQNParams
from maro.rl.training.algorithms import DQNParams, DQNTrainer

q_net_conf = {
"hidden_dims": [256, 128, 64, 32],
Expand Down Expand Up @@ -38,14 +36,18 @@ def get_dqn_policy(state_dim: int, action_num: int, name: str) -> ValueBasedPoli
name=name,
q_net=MyQNet(state_dim, action_num),
exploration_strategy=(epsilon_greedy, {"epsilon": 0.4}),
exploration_scheduling_options=[(
"epsilon", MultiLinearExplorationScheduler, {
"splits": [(2, 0.32)],
"initial_value": 0.4,
"last_ep": 5,
"final_value": 0.0,
}
)],
exploration_scheduling_options=[
(
"epsilon",
MultiLinearExplorationScheduler,
{
"splits": [(2, 0.32)],
"initial_value": 0.4,
"last_ep": 5,
"final_value": 0.0,
},
),
],
warmup=100,
)

Expand All @@ -54,7 +56,7 @@ def get_dqn(name: str) -> DQNTrainer:
return DQNTrainer(
name=name,
params=DQNParams(
reward_discount=.0,
reward_discount=0.0,
update_target_every=5,
num_epochs=10,
soft_update_coef=0.1,
Expand Down
15 changes: 7 additions & 8 deletions examples/cim/rl/algorithms/maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,29 @@
# Licensed under the MIT license.

from functools import partial
from typing import Dict, List
from typing import List

import torch
from torch.optim import Adam, RMSprop

from maro.rl.model import DiscreteACBasedNet, FullyConnected, MultiQNet
from maro.rl.policy import DiscretePolicyGradient
from maro.rl.training.algorithms import DiscreteMADDPGTrainer, DiscreteMADDPGParams

from maro.rl.training.algorithms import DiscreteMADDPGParams, DiscreteMADDPGTrainer

actor_net_conf = {
"hidden_dims": [256, 128, 64],
"activation": torch.nn.Tanh,
"softmax": True,
"batch_norm": False,
"head": True
"head": True,
}
critic_net_conf = {
"hidden_dims": [256, 128, 64],
"output_dim": 1,
"activation": torch.nn.LeakyReLU,
"softmax": False,
"batch_norm": True,
"head": True
"head": True,
}
actor_learning_rate = 0.001
critic_learning_rate = 0.001
Expand Down Expand Up @@ -64,9 +63,9 @@ def get_maddpg(state_dim: int, action_dims: List[int], name: str) -> DiscreteMAD
return DiscreteMADDPGTrainer(
name=name,
params=DiscreteMADDPGParams(
reward_discount=.0,
reward_discount=0.0,
num_epoch=10,
get_q_critic_net_func=partial(get_multi_critic_net, state_dim, action_dims),
shared_critic=False
)
shared_critic=False,
),
)
4 changes: 2 additions & 2 deletions examples/cim/rl/algorithms/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def get_ppo(state_dim: int, name: str) -> PPOTrainer:
name=name,
params=PPOParams(
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
reward_discount=.0,
reward_discount=0.0,
grad_iters=10,
critic_loss_cls=torch.nn.SmoothL1Loss,
min_logp=None,
lam=.0,
lam=0.0,
clip_ratio=0.1,
),
)
15 changes: 7 additions & 8 deletions examples/cim/rl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
env_conf = {
"scenario": "cim",
"topology": "toy.4p_ssdd_l0.0",
"durations": 560
"durations": 560,
}

if env_conf["topology"].startswith("toy"):
Expand All @@ -17,27 +17,26 @@

state_shaping_conf = {
"look_back": 7,
"max_ports_downstream": 2
"max_ports_downstream": 2,
}

action_shaping_conf = {
"action_space": [(i - 10) / 10 for i in range(21)],
"finite_vessel_space": True,
"has_early_discharge": True
"has_early_discharge": True,
}

reward_shaping_conf = {
"time_window": 99,
"fulfillment_factor": 1.0,
"shortage_factor": 1.0,
"time_decay": 0.97
"time_decay": 0.97,
}

# obtain state dimension from a temporary env_wrapper instance
state_dim = (
(state_shaping_conf["look_back"] + 1) * (state_shaping_conf["max_ports_downstream"] + 1) * len(port_attributes)
+ len(vessel_attributes)
)
state_dim = (state_shaping_conf["look_back"] + 1) * (state_shaping_conf["max_ports_downstream"] + 1) * len(
port_attributes,
) + len(vessel_attributes)

action_num = len(action_shaping_conf["action_space"])

Expand Down
33 changes: 19 additions & 14 deletions examples/cim/rl/env_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,32 @@
from maro.rl.rollout import AbsEnvSampler, CacheElement
from maro.simulator.scenarios.cim.common import Action, ActionType, DecisionEvent

from .config import (
action_shaping_conf, port_attributes, reward_shaping_conf, state_shaping_conf,
vessel_attributes,
)
from .config import action_shaping_conf, port_attributes, reward_shaping_conf, state_shaping_conf, vessel_attributes


class CIMEnvSampler(AbsEnvSampler):
def _get_global_and_agent_state_impl(
self, event: DecisionEvent, tick: int = None,
self,
event: DecisionEvent,
tick: int = None,
) -> Tuple[Union[None, np.ndarray, List[object]], Dict[Any, Union[np.ndarray, List[object]]]]:
tick = self._env.tick
vessel_snapshots, port_snapshots = self._env.snapshot_list["vessels"], self._env.snapshot_list["ports"]
port_idx, vessel_idx = event.port_idx, event.vessel_idx
ticks = [max(0, tick - rt) for rt in range(state_shaping_conf["look_back"] - 1)]
future_port_list = vessel_snapshots[tick: vessel_idx: 'future_stop_list'].astype('int')
state = np.concatenate([
port_snapshots[ticks: [port_idx] + list(future_port_list): port_attributes],
vessel_snapshots[tick: vessel_idx: vessel_attributes]
])
future_port_list = vessel_snapshots[tick:vessel_idx:"future_stop_list"].astype("int")
state = np.concatenate(
[
port_snapshots[ticks : [port_idx] + list(future_port_list) : port_attributes],
vessel_snapshots[tick:vessel_idx:vessel_attributes],
],
)
return state, {port_idx: state}

def _translate_to_env_action(
self, action_dict: Dict[Any, Union[np.ndarray, List[object]]], event: DecisionEvent,
self,
action_dict: Dict[Any, Union[np.ndarray, List[object]]],
event: DecisionEvent,
) -> Dict[Any, object]:
action_space = action_shaping_conf["action_space"]
finite_vsl_space = action_shaping_conf["finite_vessel_space"]
Expand All @@ -40,7 +43,7 @@ def _translate_to_env_action(

vsl_idx, action_scope = event.vessel_idx, event.action_scope
vsl_snapshots = self._env.snapshot_list["vessels"]
vsl_space = vsl_snapshots[self._env.tick:vsl_idx:vessel_attributes][2] if finite_vsl_space else float("inf")
vsl_space = vsl_snapshots[self._env.tick : vsl_idx : vessel_attributes][2] if finite_vsl_space else float("inf")

percent = abs(action_space[model_action[0]])
zero_action_idx = len(action_space) / 2 # index corresponding to value zero.
Expand All @@ -49,7 +52,9 @@ def _translate_to_env_action(
actual_action = min(round(percent * action_scope.load), vsl_space)
elif model_action > zero_action_idx:
action_type = ActionType.DISCHARGE
early_discharge = vsl_snapshots[self._env.tick:vsl_idx:"early_discharge"][0] if has_early_discharge else 0
early_discharge = (
vsl_snapshots[self._env.tick : vsl_idx : "early_discharge"][0] if has_early_discharge else 0
)
plan_action = percent * (action_scope.discharge + early_discharge) - early_discharge
actual_action = round(plan_action) if plan_action > 0 else round(percent * action_scope.discharge)
else:
Expand All @@ -70,7 +75,7 @@ def _get_reward(self, env_action_dict: Dict[Any, object], event: DecisionEvent,
decay_list = [reward_shaping_conf["time_decay"] ** i for i in range(reward_shaping_conf["time_window"])]
rewards = np.float32(
reward_shaping_conf["fulfillment_factor"] * np.dot(future_fulfillment.T, decay_list)
- reward_shaping_conf["shortage_factor"] * np.dot(future_shortage.T, decay_list)
- reward_shaping_conf["shortage_factor"] * np.dot(future_shortage.T, decay_list),
)
return {agent_id: reward for agent_id, reward in zip(ports, rewards)}

Expand Down
Loading

0 comments on commit 7e3c1d5

Please sign in to comment.