Skip to content

Commit

Permalink
formatting fix (#2816)
Browse files Browse the repository at this point in the history
Summary:
# Before submitting

- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?

## What does this PR do?
fix `black` failures

## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.

## Did you have fun?
Make sure you had fun coding �

Pull Request resolved: fairinternal/fairseq-py#2816

Reviewed By: alexeib

Differential Revision: D33172615

Pulled By: dianaml0

fbshipit-source-id: 36b141f42941670f1bfa981041d878042feb0428
  • Loading branch information
Diana Liskovich authored and facebook-github-bot committed Dec 17, 2021
1 parent 1e4055d commit a540213
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 41 deletions.
11 changes: 5 additions & 6 deletions fairseq/checkpoint_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@
import collections
import contextlib
import logging
import numpy as np
import os
import re
import time
import traceback
from collections import OrderedDict
from pathlib import Path
from typing import Any, Dict, Optional, Union

import numpy as np
import torch
from omegaconf import DictConfig, OmegaConf, open_dict

from fairseq.data import data_utils
from fairseq.dataclass.configs import CheckpointConfig
from fairseq.dataclass.utils import (
Expand All @@ -25,10 +28,6 @@
from fairseq.distributed.fully_sharded_data_parallel import FSDP, has_FSDP
from fairseq.file_io import PathManager
from fairseq.models import FairseqDecoder, FairseqEncoder
from omegaconf import DictConfig, open_dict, OmegaConf

from pathlib import Path


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -509,7 +508,7 @@ def load_model_ensemble_and_task_from_hf_hub(
_arg_overrides["data"] = cache_dir
return load_model_ensemble_and_task(
[p.as_posix() for p in Path(cache_dir).glob("*.pt")],
arg_overrides=_arg_overrides
arg_overrides=_arg_overrides,
)


Expand Down
10 changes: 6 additions & 4 deletions fairseq/models/hubert/hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
# LICENSE file in the root directory of this source tree.

import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple

import numpy as np

import torch
import torch.nn as nn
from dataclasses import dataclass, field
from omegaconf import II

from fairseq import utils
from fairseq.data.data_utils import compute_mask_indices
from fairseq.data.dictionary import Dictionary
Expand All @@ -25,7 +26,6 @@
HubertPretrainingConfig,
HubertPretrainingTask,
)
from omegaconf import II

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -205,9 +205,11 @@ class HubertConfig(FairseqDataclass):
)

checkpoint_activations: bool = field(
default=False, metadata={"help": "recompute activations and save memory for extra compute"}
default=False,
metadata={"help": "recompute activations and save memory for extra compute"},
)


@register_model("hubert", dataclass=HubertConfig)
class HubertModel(BaseFairseqModel):
def __init__(
Expand Down
1 change: 0 additions & 1 deletion fairseq/modules/layer_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def forward(self, x):
with torch.cuda.device(x.device):
return super().forward(x)


except ImportError:
has_fused_layernorm = False

Expand Down
6 changes: 3 additions & 3 deletions tests/speech/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
# LICENSE file in the root directory of this source tree.

import os
from pathlib import Path
import unittest
from pathlib import Path

import torch

Expand All @@ -31,7 +31,7 @@ def set_up_librispeech(self):
"spm_librispeech_unigram10000.model",
"spm_librispeech_unigram10000.txt",
"librispeech_test-other.tsv",
"librispeech_test-other.zip"
"librispeech_test-other.zip",
]
self.base_url = f"{S3_BASE_URL}/s2t/librispeech"
for filename in self.data_filenames:
Expand All @@ -47,7 +47,7 @@ def set_up_ljspeech(self):
"ljspeech_g2p_gcmvn_stats.npz",
"ljspeech_g2p.txt",
"ljspeech_test.tsv",
"ljspeech_test.zip"
"ljspeech_test.zip",
]
self.base_url = f"{S3_BASE_URL}/s2/ljspeech"
for filename in self.data_filenames:
Expand Down
17 changes: 10 additions & 7 deletions tests/speech/test_fastspeech2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import torch
from tqdm import tqdm

from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq import utils
from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion
from tests.speech import TestFairseqSpeech

Expand All @@ -25,12 +25,13 @@ def test_ljspeech_fastspeech2_checkpoint(self):
path = self.download(self.base_url, self.root, checkpoint_filename)

models, cfg, task = load_model_ensemble_and_task(
[path.as_posix()], arg_overrides={
[path.as_posix()],
arg_overrides={
"data": self.root.as_posix(),
"config_yaml": "cfg_ljspeech_g2p.yaml",
"vocoder": "griffin_lim",
"fp16": False
}
"fp16": False,
},
)
if self.use_cuda:
for model in models:
Expand All @@ -40,19 +41,21 @@ def test_ljspeech_fastspeech2_checkpoint(self):
task.load_dataset(test_split)
batch_iterator = task.get_batch_iterator(
dataset=task.dataset(test_split),
max_tokens=65_536, max_positions=4_096, num_workers=1
max_tokens=65_536,
max_positions=4_096,
num_workers=1,
).next_epoch_itr(shuffle=False)
progress = tqdm(batch_iterator, total=len(batch_iterator))
generator = task.build_generator(models, cfg)

mcd, n_samples = 0., 0
mcd, n_samples = 0.0, 0
for sample in progress:
sample = utils.move_to_cuda(sample) if self.use_cuda else sample
hypos = generator.generate(models[0], sample, has_targ=True)
rets = batch_mel_cepstral_distortion(
[hypo["targ_waveform"] for hypo in hypos],
[hypo["waveform"] for hypo in hypos],
sr=task.sr
sr=task.sr,
)
mcd += sum(d.item() for d, _ in rets)
n_samples += len(sample["id"].tolist())
Expand Down
37 changes: 25 additions & 12 deletions tests/speech/test_s2t_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from argparse import Namespace
import unittest
from argparse import Namespace

import torch
from tqdm import tqdm

from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq import utils
from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq.scoring.wer import WerScorer
from tests.speech import TestFairseqSpeech

Expand All @@ -26,10 +26,11 @@ def test_librispeech_s2t_transformer_s_checkpoint(self):
path = self.download(self.base_url, self.root, checkpoint_filename)

models, cfg, task = load_model_ensemble_and_task(
[path.as_posix()], arg_overrides={
[path.as_posix()],
arg_overrides={
"data": self.root.as_posix(),
"config_yaml": "cfg_librispeech.yaml"
}
"config_yaml": "cfg_librispeech.yaml",
},
)
if self.use_cuda:
for model in models:
Expand All @@ -38,21 +39,33 @@ def test_librispeech_s2t_transformer_s_checkpoint(self):
test_split = "librispeech_test-other"
task.load_dataset(test_split)
batch_iterator = task.get_batch_iterator(
dataset=task.dataset(test_split),
max_tokens=65_536, max_positions=(4_096, 1_024), num_workers=1
).next_epoch_itr(shuffle=False)
dataset=task.dataset(test_split),
max_tokens=65_536,
max_positions=(4_096, 1_024),
num_workers=1,
).next_epoch_itr(shuffle=False)

scorer_args = {"wer_tokenizer": "none", "wer_lowercase": False,
"wer_remove_punct": False, "wer_char_level": False}
scorer_args = {
"wer_tokenizer": "none",
"wer_lowercase": False,
"wer_remove_punct": False,
"wer_char_level": False,
}
scorer = WerScorer(Namespace(**scorer_args))
progress = tqdm(enumerate(batch_iterator), total=len(batch_iterator))
for batch_idx, sample in progress:
sample = utils.move_to_cuda(sample) if self.use_cuda else sample
hypo = task.inference_step(generator, models, sample)
for i, sample_id in enumerate(sample["id"].tolist()):
tgt_tokens = utils.strip_pad(sample["target"][i, :], task.tgt_dict.pad()).int().cpu()
tgt_tokens = (
utils.strip_pad(sample["target"][i, :], task.tgt_dict.pad())
.int()
.cpu()
)
tgt_str = task.tgt_dict.string(tgt_tokens, "sentencepiece")
hypo_str = task.tgt_dict.string(hypo[i][0]["tokens"].int().cpu(), "sentencepiece")
hypo_str = task.tgt_dict.string(
hypo[i][0]["tokens"].int().cpu(), "sentencepiece"
)
if batch_idx == 0 and i < 3:
print(f"T-{sample_id} {tgt_str}")
print(f"H-{sample_id} {hypo_str}")
Expand Down
17 changes: 10 additions & 7 deletions tests/speech/test_tts_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import torch
from tqdm import tqdm

from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq import utils
from fairseq.checkpoint_utils import load_model_ensemble_and_task
from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion
from tests.speech import TestFairseqSpeech

Expand All @@ -25,12 +25,13 @@ def test_ljspeech_tts_transformer_checkpoint(self):
path = self.download(self.base_url, self.root, checkpoint_filename)

models, cfg, task = load_model_ensemble_and_task(
[path.as_posix()], arg_overrides={
[path.as_posix()],
arg_overrides={
"data": self.root.as_posix(),
"config_yaml": "cfg_ljspeech_g2p.yaml",
"vocoder": "griffin_lim",
"fp16": False
}
"fp16": False,
},
)
if self.use_cuda:
for model in models:
Expand All @@ -40,19 +41,21 @@ def test_ljspeech_tts_transformer_checkpoint(self):
task.load_dataset(test_split)
batch_iterator = task.get_batch_iterator(
dataset=task.dataset(test_split),
max_tokens=65_536, max_positions=768, num_workers=1
max_tokens=65_536,
max_positions=768,
num_workers=1,
).next_epoch_itr(shuffle=False)
progress = tqdm(batch_iterator, total=len(batch_iterator))
generator = task.build_generator(models, cfg)

mcd, n_samples = 0., 0
mcd, n_samples = 0.0, 0
for sample in progress:
sample = utils.move_to_cuda(sample) if self.use_cuda else sample
hypos = generator.generate(models[0], sample, has_targ=True)
rets = batch_mel_cepstral_distortion(
[hypo["targ_waveform"] for hypo in hypos],
[hypo["waveform"] for hypo in hypos],
sr=task.sr
sr=task.sr,
)
mcd += sum(d.item() for d, _ in rets)
n_samples += len(sample["id"].tolist())
Expand Down
1 change: 1 addition & 0 deletions tests/test_hf_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import unittest

import torch

try:
import huggingface_hub
except ImportError:
Expand Down
1 change: 0 additions & 1 deletion tests/test_sequence_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,6 @@ def test_diverse_beam_search(self):
self.assertHypoScore(hypos[1][1], [0.7, 0.35, 0.9], [0, 2, 1], 0.5)



class TestTopPSamplingSearch(TestSequenceGeneratorBase):
def setUp(self):
# construct dummy dictionary
Expand Down

0 comments on commit a540213

Please sign in to comment.