chore(preprocess): logger

svc-develop-team · Nov 11, 2023 · c797641 · c797641
1 parent 1c234be
commit c797641
Show file tree

Hide file tree

Showing 15 changed files with 81 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -169,3 +169,5 @@ trained/**/
 configs/
 filelists/*
 filelists/val.txt
+test_pipe.py
+test_queue.py
diff --git a/logger/__init__.py b/logger/__init__.py
@@ -38,7 +38,6 @@ def format_level(str, length):
     return str
 
 def default_format(record):
-    print(record)
     return f"[green]{record['time'].strftime('%Y-%m-%d %H:%M:%S')}[/green] | [level]{format_level(record['level'].name,7)}[/level] | [cyan]{record['file'].path.replace(os.getcwd()+os.sep,'')}:{record['line']}[/cyan] - [level]{record['message']}[/level]\n"
 
 
@@ -74,4 +73,4 @@ def Progress():
         TextColumn("[red]*Elapsed[/red]"),
         TimeElapsedColumn(),
         console=console,
-    )
+    )
diff --git a/preprocess_hubert_f0.py b/preprocess_hubert_f0.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+from multiprocessing import Pipe
 import os
 import random
 from concurrent.futures import ProcessPoolExecutor
@@ -103,30 +104,47 @@ def process_one(filename, hmodel, f0p, device, diff=False, mel_extractor=None):
             np.save(aug_vol_path,aug_vol.to('cpu').numpy())
 
 
-def process_batch(file_chunk, f0p, diff=False, mel_extractor=None, device="cpu"):
-    logger.info("Loading speech encoder for content...")
+def process_batch(pipe, file_chunk, f0p, diff=False, mel_extractor=None, device="cpu"):
+    # logger.info("Loading speech encoder for content...")
     rank = mp.current_process()._identity
     rank = rank[0] if len(rank) > 0 else 0
     if torch.cuda.is_available():
         gpu_id = rank % torch.cuda.device_count()
         device = torch.device(f"cuda:{gpu_id}")
-    logger.info(f"Rank {rank} uses device {device}")
-    hmodel = utils.get_speech_encoder(speech_encoder, device=device)
-    logger.info(f"Loaded speech encoder for rank {rank}")
-    for filename in tqdm(file_chunk, position = rank):
+    # logger.info(f"Rank {rank} uses device {device}")
+    hmodel = utils.get_speech_encoder(speech_encoder, device=device, log=False)
+    # logger.info(f"Loaded speech encoder for rank {rank}")
+    # for filename in tqdm(file_chunk, position = rank):
+    #     process_one(filename, hmodel, f0p, device, diff, mel_extractor)
+    for filename in file_chunk:
         process_one(filename, hmodel, f0p, device, diff, mel_extractor)
+        pipe.send(1)
+        # logger.info(f"Rank {rank} finished {filename}")
 
 def parallel_process(filenames, num_processes, f0p, diff, mel_extractor, device):
-    with ProcessPoolExecutor(max_workers=num_processes) as executor:
-        tasks = []
-        for i in range(num_processes):
-            start = int(i * len(filenames) / num_processes)
-            end = int((i + 1) * len(filenames) / num_processes)
-            file_chunk = filenames[start:end]
-            tasks.append(executor.submit(process_batch, file_chunk, f0p, diff, mel_extractor, device=device))
-        for task in tqdm(tasks, position = 0):
-            task.result()
-
+    with logger.Progress() as progress:
+        task = progress.add_task("Preprocessing", total=len(filenames))
+        with ProcessPoolExecutor(max_workers=num_processes) as executor:
+            tasks = []
+            pipes = []
+            for i in range(num_processes):
+                start = int(i * len(filenames) / num_processes)
+                end = int((i + 1) * len(filenames) / num_processes)
+                file_chunk = filenames[start:end]
+                parent_conn, child_conn = Pipe()
+                pipes.append((parent_conn, child_conn))   
+                tasks.append(executor.submit(process_batch, child_conn, file_chunk, f0p, diff, mel_extractor, device=device))
+            while True:
+                # if all([task.done() for task in tasks]):
+                #     break
+                for parent_conn, child_conn in pipes:
+                    # if not parent_conn.empty():
+                    parent_conn.recv()
+                    # logger.debug("Received message")
+                    progress.advance(task)
+                    # progress.refresh()
+                if progress.finished:
+                    break
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('-d', '--device', type=str, default=None)

diff --git a/test.py b/test.py
diff --git a/utils.py b/utils.py
@@ -110,46 +110,46 @@ def get_f0_predictor(f0_predictor,hop_length,sampling_rate,**kargs):
         raise Exception("Unknown f0 predictor")
     return f0_predictor_object
 
-def get_speech_encoder(speech_encoder,device=None,**kargs):
+def get_speech_encoder(speech_encoder,device=None,log=True,**kargs):
     if speech_encoder == "vec768l12":
         from vencoder.ContentVec768L12 import ContentVec768L12
-        speech_encoder_object = ContentVec768L12(device = device)
+        speech_encoder_object = ContentVec768L12(device = device,log=log)
     elif speech_encoder == "vec256l9":
         from vencoder.ContentVec256L9 import ContentVec256L9
-        speech_encoder_object = ContentVec256L9(device = device)
+        speech_encoder_object = ContentVec256L9(device = device,log=log)
     elif speech_encoder == "vec256l9-onnx":
         from vencoder.ContentVec256L9_Onnx import ContentVec256L9_Onnx
-        speech_encoder_object = ContentVec256L9_Onnx(device = device)
+        speech_encoder_object = ContentVec256L9_Onnx(device = device,log=log)
     elif speech_encoder == "vec256l12-onnx":
         from vencoder.ContentVec256L12_Onnx import ContentVec256L12_Onnx
-        speech_encoder_object = ContentVec256L12_Onnx(device = device)
+        speech_encoder_object = ContentVec256L12_Onnx(device = device,log=log)
     elif speech_encoder == "vec768l9-onnx":
         from vencoder.ContentVec768L9_Onnx import ContentVec768L9_Onnx
-        speech_encoder_object = ContentVec768L9_Onnx(device = device)
+        speech_encoder_object = ContentVec768L9_Onnx(device = device,log=log)
     elif speech_encoder == "vec768l12-onnx":
         from vencoder.ContentVec768L12_Onnx import ContentVec768L12_Onnx
-        speech_encoder_object = ContentVec768L12_Onnx(device = device)
+        speech_encoder_object = ContentVec768L12_Onnx(device = device,log=log)
     elif speech_encoder == "hubertsoft-onnx":
         from vencoder.HubertSoft_Onnx import HubertSoft_Onnx
-        speech_encoder_object = HubertSoft_Onnx(device = device)
+        speech_encoder_object = HubertSoft_Onnx(device = device,log=log)
     elif speech_encoder == "hubertsoft":
         from vencoder.HubertSoft import HubertSoft
-        speech_encoder_object = HubertSoft(device = device)
+        speech_encoder_object = HubertSoft(device = device,log=log)
     elif speech_encoder == "whisper-ppg":
         from vencoder.WhisperPPG import WhisperPPG
-        speech_encoder_object = WhisperPPG(device = device)
+        speech_encoder_object = WhisperPPG(device = device,log=log)
     elif speech_encoder == "cnhubertlarge":
         from vencoder.CNHubertLarge import CNHubertLarge
-        speech_encoder_object = CNHubertLarge(device = device)
+        speech_encoder_object = CNHubertLarge(device = device,log=log)
     elif speech_encoder == "dphubert":
         from vencoder.DPHubert import DPHubert
-        speech_encoder_object = DPHubert(device = device)
+        speech_encoder_object = DPHubert(device = device,log=log)
     elif speech_encoder == "whisper-ppg-large":
         from vencoder.WhisperPPGLarge import WhisperPPGLarge
-        speech_encoder_object = WhisperPPGLarge(device = device)
+        speech_encoder_object = WhisperPPGLarge(device = device,log=log)
     elif speech_encoder == "wavlmbase+":
         from vencoder.WavLMBasePlus import WavLMBasePlus
-        speech_encoder_object = WavLMBasePlus(device = device)
+        speech_encoder_object = WavLMBasePlus(device = device,log=log)
     else:
         raise Exception("Unknown speech encoder")
     return speech_encoder_object 

diff --git a/vencoder/CNHubertLarge.py b/vencoder/CNHubertLarge.py
@@ -5,10 +5,11 @@
 
 
 class CNHubertLarge(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None):
+    def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 1024
         models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
           [vec_path],

diff --git a/vencoder/ContentVec256L12_Onnx.py b/vencoder/ContentVec256L12_Onnx.py
@@ -5,10 +5,11 @@
 
 
 class ContentVec256L12_Onnx(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None):
+    def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 256
         if device is None:
             self.dev = torch.device("cpu")

diff --git a/vencoder/ContentVec256L9.py b/vencoder/ContentVec256L9.py
@@ -5,10 +5,11 @@
 
 
 class ContentVec256L9(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
+    def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
           [vec_path],
           suffix="",

diff --git a/vencoder/ContentVec256L9_Onnx.py b/vencoder/ContentVec256L9_Onnx.py
@@ -5,10 +5,11 @@
 
 
 class ContentVec256L9_Onnx(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None):
+    def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 256
         if device is None:
             self.dev = torch.device("cpu")

diff --git a/vencoder/ContentVec768L12.py b/vencoder/ContentVec768L12.py
@@ -5,10 +5,11 @@
 
 
 class ContentVec768L12(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
+    def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 768
         models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
           [vec_path],

diff --git a/vencoder/ContentVec768L12_Onnx.py b/vencoder/ContentVec768L12_Onnx.py
@@ -5,10 +5,11 @@
 
 
 class ContentVec768L12_Onnx(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None):
+    def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 768
         if device is None:
             self.dev = torch.device("cpu")

diff --git a/vencoder/DPHubert.py b/vencoder/DPHubert.py
@@ -5,10 +5,11 @@
 
 
 class DPHubert(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/DPHuBERT-sp0.75.pth", device=None):
+    def __init__(self, vec_path="pretrain/DPHuBERT-sp0.75.pth", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         if device is None:
             self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         else:

diff --git a/vencoder/HubertSoft.py b/vencoder/HubertSoft.py
@@ -5,10 +5,11 @@
 
 
 class HubertSoft(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/hubert-soft-0d54a1f4.pt", device=None):
+    def __init__(self, vec_path="pretrain/hubert-soft-0d54a1f4.pt", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         hubert_soft = hubert_model.hubert_soft(vec_path)
         if device is None:
             self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")

diff --git a/vencoder/HubertSoft_Onnx.py b/vencoder/HubertSoft_Onnx.py
@@ -5,10 +5,11 @@
 
 
 class HubertSoft_Onnx(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None):
+    def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         self.hidden_dim = 256
         if device is None:
             self.dev = torch.device("cpu")

diff --git a/vencoder/WavLMBasePlus.py b/vencoder/WavLMBasePlus.py
@@ -5,10 +5,11 @@
 
 
 class WavLMBasePlus(SpeechEncoder):
-    def __init__(self, vec_path="pretrain/WavLM-Base+.pt", device=None):
+    def __init__(self, vec_path="pretrain/WavLM-Base+.pt", device=None, log=True):
         super().__init__()
         import logger
-        logger.info("load model(s) from {}".format(vec_path))
+        if log:
+            logger.info("load model(s) from {}".format(vec_path))
         checkpoint = torch.load(vec_path)
         self.cfg = WavLMConfig(checkpoint['cfg'])
         if device is None: