replicate

pollinations · Jun 13, 2023 · 53ce520 · 53ce520
1 parent ebdf5e1
commit 53ce520
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 30 deletions.
diff --git a/cog.yaml b/cog.yaml
@@ -1,35 +1,6 @@
-# # Configuration for Cog ⚙️
-# # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
-
-# build:
-#   gpu: true
-#   cuda: "11.6"
-#   python_version: "3.10"
-#   system_packages:
-#     - "ffmpeg"
-#   python_packages:
-#     - "torch==1.13.1"
-#     - "sentencepiece==0.1.97"
-#     - "accelerate==0.16.0"
-#     - "Pillow==9.5.0"
-#     - "attrs==22.2.0"
-#     - "opencv-python==4.7.0.72"
-#     - git+https://github.com/openai/CLIP.git
-#     - "tqdm==4.65.0"
-#     - "ftfy==6.1.1"
-#     - "blobfile==2.0.1"
-#     - "transformers==4.23.1"
-#     - "torchvision==0.14.1"
-#     - "omegaconf==2.3.0"
-#     - "pytorch_lightning==2.0.1"
-#     - "einops==0.6.0"
-
-# predict: "predict.py:Predictor"
-
-
 build:
   gpu: true
-  cuda: "11.6"
+  cuda: "11.8"
   python_version: "3.10"
   system_packages:
     - nvidia-docker2
@@ -51,3 +22,4 @@ build:
     - "einops==0.6.0"
   run:
     - pip install av einops flashy>=0.0.1 hydra-core>=1.1 hydra_colorlog julius num2words numpy sentencepiece spacy==3.5.2 torch>=2.0.0 torchaudio>=2.0.0 huggingface_hub tqdm transformers xformers demucs librosa gradio
+predict: "predict.py:Predictor"
diff --git a/predict.py b/predict.py
@@ -0,0 +1,50 @@
+# Prediction interface for Cog ⚙️
+# https://github.com/replicate/cog/blob/main/docs/python.md
+
+from cog import BasePredictor, Input, Path
+import torchaudio
+from audiocraft.models import MusicGen
+from audiocraft.data.audio import audio_write
+import soundfile as sf
+import time
+import uuid
+
+class Predictor(BasePredictor):
+    def setup(self):
+        """Load the model into memory to make running multiple predictions efficient"""
+        self.model = MusicGen.get_pretrained('large', device='cuda')
+        self.model.set_generation_params(duration=12)  # generate 8 seconds.
+        # wav = model.generate_unconditional(4)    # generates 4 unconditional audio samples
+
+
+        self.model_melody = MusicGen.get_pretrained('melody', device='cuda')
+        self.model_melody.set_generation_params(duration=16)  # generate 8 seconds.
+        # wav = model.generate_unconditional(4)    # generates 4 unconditional audio samples
+
+    def predict(
+        self,
+        text: str = Input(description="Text prompt", default="Music to watch girls go by"),
+        # scale: float = Input(
+        #     description="Factor to scale image by", ge=0, le=10, default=1.5
+        # ),
+    ) -> Path:
+        """Run a single prediction on the model"""
+
+        start = time.time()
+        wav = self.model.generate([text], progress=True)  # generates 3 samples.
+        wav = wav[0]
+        end = time.time()
+        print(f"Generation took {end-start} seconds")
+
+        save_path = f"/tmp/{uuid.uuid4()}"
+
+        # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
+        print(f"Saving {save_path}")
+        audio_write(f'{save_path}', wav.cpu(), self.model.sample_rate, strategy="loudness")
+
+
+        save_path = save_path + ".wav"
+        # return path to wav file
+        return Path(save_path)
+
+