First commit

lucataco · Nov 10, 2023 · ba40870 · ba40870
commit ba40870
Show file tree

Hide file tree

Showing 7 changed files with 76 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,17 @@
+# The .dockerignore file excludes files from the container build process.
+#
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+
+# Exclude Git files
+.git
+.github
+.gitignore
+
+# Exclude Python cache files
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+
+# Exclude Python virtual environment
+/venv
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+.cog
+TTS
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+# coqui /xtts-v2
+
+This is an implementation of the [coqui /xtts-v2](https://github.com/coqui-ai/tts) as a Cog model. [Cog packages machine learning models as standard containers.](https://github.com/replicate/cog)
+
+Run predictions:
+
+    cog predict -i text="Hi there, I'm your new voice clone. Try your best to upload quality audio" -i speaker_wav=@female.wav
+
+## Example:
+
+"Hi there, I'm your new voice clone. Try your best to upload quality audio"
+
+![alt text](output.wav)
diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,10 @@
+# Configuration for Cog ⚙️
+build:
+  gpu: true
+  cuda: "11.8"
+  python_version: "3.11"
+
+  python_packages:
+    - "git+https://github.com/coqui-ai/TTS.git"
+
+predict: "predict.py:Predictor"
diff --git a/female.wav b/female.wav
diff --git a/output.wav b/output.wav
diff --git a/predict.py b/predict.py
@@ -0,0 +1,33 @@
+# Prediction interface for Cog
+from cog import BasePredictor, Input, Path
+import os
+from TTS.api import TTS
+
+class Predictor(BasePredictor):
+    def setup(self) -> None:
+        """Load the model into memory to make running multiple predictions efficient"""
+        os.environ["COQUI_TOS_AGREED"] = "1"
+        self.model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to('cuda')
+
+    def predict(
+        self,
+        text: str = Input(
+            description="Text to synthesize",
+            default="Hi there, I'm your new voice clone. Try your best to upload quality audio"
+        ),
+        speaker_wav: Path = Input(description="Original speaker audio"),
+        language: str = Input(
+            description="Language",
+            choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn"],
+            default="en"
+        ),
+    ) -> Path:
+        """Run a single prediction on the model"""
+        path = self.model.tts_to_file(
+            text=text, 
+            file_path = "output.wav",
+            speaker_wav = speaker_wav,
+            language= language
+        )
+
+        return Path(path)