forked from facebookresearch/metaseq
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
commit 511504b Author: Susan Zhang <suchenzang@users.noreply.github.com> Date: Sun Jan 1 17:00:25 2023 +0100 Init for model_parallel == 1 (facebookresearch#577) * gate by arch, not by mp size * add back mp > 1 conditional commit 59403be Author: Susan Zhang <suchenzang@users.noreply.github.com> Date: Sun Jan 1 00:42:37 2023 +0100 [Cleanup] Remove MegatronTrainer (facebookresearch#576) commit 6687b6f Author: Susan Zhang <suchenzang@users.noreply.github.com> Date: Sat Dec 31 17:38:28 2022 +0100 use bash (facebookresearch#575) commit a87e08f Author: Stephen Roller <roller@fb.com> Date: Fri Dec 30 14:11:57 2022 -0500 Add Sharan to CODEOWNERS (facebookresearch#558) commit 1d4af00 Author: Stephen Roller <roller@fb.com> Date: Fri Dec 30 14:11:47 2022 -0500 Fix config.yml dump in training runs. (facebookresearch#557) commit ed85aad Author: Christian Clauss <cclauss@me.com> Date: Fri Dec 30 07:43:41 2022 +0100 Current flake8 no longer accepts comments on config lines (facebookresearch#570) * Current flake8 no longer accepts comments on config lines `ValueError: Error code '#' supplied to 'extend-ignore' option does not match '^[A-Z]{1,3}[0-9]{0,3}$'` * flake8==6.0.0 * Update .flake8 * Update setup.py Co-authored-by: Stephen Roller <roller@fb.com> Co-authored-by: Stephen Roller <roller@fb.com> commit db6842b Author: Taichi Nishimura <lack_un@yahoo.co.jp> Date: Fri Dec 30 12:14:49 2022 +0900 Add backslash to the script in projects/OPT/download_opt175b.md (facebookresearch#573) * add backslash to script * add backslash to docs/api.md commit 966561e Author: Binh Tang <tangbinh.na@gmail.com> Date: Wed Dec 28 13:11:39 2022 -0800 Add a new script to reshard model parallel parts (facebookresearch#556) Co-authored-by: Binh Tang <tangbinhna@gmail.com>
- Loading branch information
Showing
18 changed files
with
303 additions
and
267 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,10 @@ | ||
[flake8] | ||
extend-ignore = | ||
F541 # f-string is missing placeholders | ||
E203 # whitespace with black | ||
E741 # "l" is ambiguous | ||
# E203: whitespace with black | ||
E203 | ||
# E741: "l" is ambiguous | ||
E741 | ||
# F541: f-string is missing placeholders | ||
F541 | ||
# github size | ||
max-line-length=127 |
Validating CODEOWNERS rules …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
* @suchenzang @stephenroller @ngoyal2707 @punitkoura @moyapchen @klshuster @ruanslv @davides @dgrnbrg-meta @igormolybogFB @Xirider | ||
* @suchenzang @stephenroller @ngoyal2707 @punitkoura @moyapchen @klshuster @ruanslv @davides @dgrnbrg-meta @igormolybogFB @Xirider @sharannarang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from metaseq.model_parallel.modules import ModelParallelTransformerEncoderLayer | ||
from metaseq.models.transformer_encoder import TransformerEncoder | ||
|
||
|
||
class ModelParallelTransformerEncoder(TransformerEncoder): | ||
""" | ||
Model parallel Transformer encoder consisting of *args.encoder_layers* layers. Each layer | ||
is a :class:`ModelParallelTransformerEncoderLayer`. | ||
""" | ||
|
||
def __init__(self, args, dictionary, embed_tokens): | ||
super().__init__(args, dictionary, embed_tokens) | ||
|
||
if args.no_final_layer_norm: | ||
self.layer_norm = None | ||
|
||
def build_encoder_layer(self, args): | ||
return ModelParallelTransformerEncoderLayer(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
metaseq/model_parallel/modules/transformer_encoder_layer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
try: | ||
from megatron.mpu import ( | ||
ColumnParallelLinear, | ||
RowParallelLinear, | ||
) | ||
|
||
has_megatron_submodule = True | ||
except (ImportError, ModuleNotFoundError): | ||
has_megatron_submodule = False | ||
|
||
from metaseq.model_parallel.modules import ModelParallelMultiheadAttention | ||
from metaseq.modules import TransformerEncoderLayer | ||
|
||
|
||
class ModelParallelTransformerEncoderLayer(TransformerEncoderLayer): | ||
"""Encoder layer block over multiple gpus. | ||
See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details. | ||
""" | ||
|
||
def build_fc1(self, input_dim, output_dim): | ||
if not has_megatron_submodule: | ||
raise ImportError( | ||
"\n\nPlease install megatron using the setup instructions!" | ||
) | ||
return ColumnParallelLinear( | ||
input_dim, output_dim, gather_output=False, skip_bias_add=True | ||
) | ||
|
||
def build_fc2(self, input_dim, output_dim): | ||
if not has_megatron_submodule: | ||
raise ImportError( | ||
"\n\nPlease install megatron using the setup instructions!" | ||
) | ||
return RowParallelLinear( | ||
input_dim, output_dim, input_is_parallel=True, skip_bias_add=True | ||
) | ||
|
||
def build_self_attention(self, embed_dim, args, **unused_kwargs): | ||
return ModelParallelMultiheadAttention( | ||
embed_dim, | ||
args.encoder_attention_heads, | ||
dropout=args.attention_dropout, | ||
self_attention=True, | ||
) |
Oops, something went wrong.