In [None]:
# 🔹 Step 1: Install required libraries
!pip install git+https://github.com/suno-ai/bark.git
!pip install librosa soundfile

# 🔹 Step 2: Import libraries
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from google.colab import files
import torch
import librosa
import numpy as np
import soundfile as sf

# 🔹 Step 3: Fix torch.load bug
original_torch_load = torch.load
def custom_torch_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return original_torch_load(*args, **kwargs)
torch.load = custom_torch_load

# 🔹 Step 4: Load Bark Models (small models for speed)
preload_models(text_use_small=True, coarse_use_small=True, fine_use_small=True)

# 🔹 Step 5: Choose Mode (Text-only OR Reference Voice)
mode = input("Enter mode: '1' for text-to-voice (Week 1), '2' for reference voice + text (Week 2): ")

if mode == '1':
    # Week 1: Just text to speech using default Bark voice
    text = input("Enter your text: ")
    audio_array = generate_audio(text)
    write_wav("bark_week1.wav", SAMPLE_RATE, audio_array)
    files.download("bark_week1.wav")
    print("✅ Week 1 voice generated and downloaded.")

elif mode == '2':
    # Week 2: Upload reference audio and generate speech (simulated cloning)
    print("Upload your 30–60 sec reference WAV file (mono, 24kHz)...")
    uploaded = files.upload()
    audio_file = list(uploaded.keys())[0]

    # Load and normalize reference audio
    def preprocess_audio(audio_path, target_sr=24000):
        audio, sr = sf.read(audio_path)
        if sr != target_sr:
            audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        if audio.ndim > 1:
            audio = np.mean(audio, axis=1)
        audio = audio / (np.max(np.abs(audio)) + 1e-6)
        return audio

    reference_audio = preprocess_audio(audio_file)

    text = input("Enter the text you want the cloned voice to say: ")
    audio_array = generate_audio(text, history_prompt="v2/en_speaker_9")  # default voice
    write_wav("cloned_voice_week2.wav", SAMPLE_RATE, audio_array)
    files.download("cloned_voice_week2.wav")
    print("✅ Week 2 simulated voice generated and downloaded.")

else:
    print("❌ Invalid input. Run again and enter '1' or '2'.")
