Support multiple layer extraction

s3prl · Jun 14, 2021 · 39b4087 · 39b4087
2 parents 6531904 + 7246614
commit 39b4087
Show file tree

Hide file tree

Showing 59 changed files with 1,047 additions and 1,738 deletions.
diff --git a/README.md b/README.md
@@ -52,14 +52,20 @@ If you find this toolkit helpful to your research, please do consider to cite [o
 * Install **sox** on your OS
 * Install generally used packages for *pretrain*, *upstream* and *downstream*:
 
-```
+```bash
 git clone https://github.com/s3prl/s3prl.git
 cd s3prl/
 pip install -r requirements.txt
 cd ../
 
 git clone https://github.com/pytorch/fairseq.git
 cd fairseq/
+
+# The version used by the repo maintainer currently.
+# Please must not use the stable version 0.10.2 as it
+# contains known bugs for wav2vec2 inference and ASR decoding
+git checkout 8df9e3a4
+
 pip install -e ./
 cd ../
 ```

diff --git a/downstream/emotion/expert.py b/downstream/emotion/expert.py
@@ -41,7 +41,10 @@ def __init__(self, upstream_dim, downstream_expert, downstream_variant, expdir,
         dataset = IEMOCAPDataset(DATA_ROOT, train_path, self.datarc['pre_load'])
         trainlen = int((1 - self.datarc['valid_ratio']) * len(dataset))
         lengths = [trainlen, len(dataset) - trainlen]
+
+        torch.manual_seed(0)
         self.train_dataset, self.dev_dataset = random_split(dataset, lengths)
+
         self.test_dataset = IEMOCAPDataset(DATA_ROOT, test_path, self.datarc['pre_load'])
 
         model_cls = eval(self.modelrc['select'])

diff --git a/downstream/example/dataset.py b/downstream/example/dataset.py
@@ -6,7 +6,7 @@
 
 SAMPLE_RATE = 16000
 EXAMPLE_WAV_MIN_SEC = 5
-EXAMPLE_WAV_MAX_SEC = 15
+EXAMPLE_WAV_MAX_SEC = 20
 EXAMPLE_DATASET_SIZE = 1000
 
 
@@ -15,8 +15,8 @@ def __init__(self, **kwargs):
         self.class_num = 48
 
     def __getitem__(self, idx):
-        wav_sec = random.randint(EXAMPLE_WAV_MIN_SEC, EXAMPLE_WAV_MAX_SEC)
-        wav = torch.randn(SAMPLE_RATE * wav_sec)
+        samples = random.randint(EXAMPLE_WAV_MIN_SEC * SAMPLE_RATE, EXAMPLE_WAV_MAX_SEC * SAMPLE_RATE)
+        wav = torch.randn(samples)
         label = random.randint(0, self.class_num - 1)
         return wav, label