Step 1: Install libs

In [None]:

!pip install -q gradio soundfile numpy scipy sqlalchemy python-dotenv pydub librosa requests
# Optional heavy libs if you want HF/transformers locally (GPU required)
# !pip install -q transformers accelerate diffusers "huggingface_hub>=0.10.0" replicate openai sentence-transformers

Step 1: Imports and Setup

Sab imports top pe laa raha hoon.
Logger setup kar raha hoon (kyunki code mein logger use ho raha hai lekin define nahi).
Constants (STORAGE_FOLDER etc.) define.

In [None]:
import os
import logging
import shutil
import time
import random
import uuid
import sqlite3
import numpy as np
import gradio as gr
import soundfile as sf
from pathlib import Path

# Set up logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# Set up directories and database
STORAGE_FOLDER = Path("creative_audio_storage")
SOUND_FOLDER = STORAGE_FOLDER / "sounds"
DB_FILE = STORAGE_FOLDER / "records.db"
STORAGE_FOLDER.mkdir(parents=True, exist_ok=True)
SOUND_FOLDER.mkdir(parents=True, exist_ok=True)

# Database connection for tracking creations
db_conn = sqlite3.connect(DB_FILE, check_same_thread=False)
db_cursor = db_conn.cursor()
db_cursor.execute("""
    CREATE TABLE IF NOT EXISTS creations (
        creation_id TEXT PRIMARY KEY,
        type TEXT,
        description TEXT,
        reference_audio TEXT,
        output_file TEXT,
        source TEXT,
        timestamp REAL
    )
""")
db_conn.commit()





Step 2: Procedural Music Function

create_background_tune function same rakha, sirf indentation fix ki.
Added missing np and random usage (already imported).

In [None]:
# Improved procedural music creation function
def create_background_tune(length_seconds: int = 25, tempo: int = 110, random_seed: int = None) -> str:
    """
    Generates a procedural ambient tune using layered waveforms.
    Upgraded with varying rhythms, multiple layers, and subtle effects.
    """
    if random_seed is not None:
        random.seed(random_seed)
        np.random.seed(random_seed)
    sample_rate = 22050  # Lowered for lighter files
    time_array = np.linspace(0, length_seconds, int(sample_rate * length_seconds), endpoint=False)
    # Chord progressions with variations
    chord_bases = [
        [261.63, 329.63, 392.00, 523.25],  # Extended C major
        [293.66, 349.23, 440.00, 587.32],  # D minor 7th-ish
        [329.63, 392.00, 493.88, 659.25],  # E minor add9
        [349.23, 440.00, 523.25, 698.46],  # F major 7th
    ]
    tune = np.zeros_like(time_array)
    section_length = len(time_array) // len(chord_bases)
    for idx, chords in enumerate(chord_bases):
        section_start = idx * section_length
        section_end = section_start + section_length if idx < len(chord_bases) - 1 else len(time_array)
        section_time = time_array[section_start:section_end]
        wave = np.zeros_like(section_time)
        for freq in chords:
            variation = random.uniform(-2.0, 2.0)  # Slight detune for richness
            wave += 0.25 * np.sin(2 * np.pi * (freq + variation) * section_time)
        # Improved envelope: ADSR-like for each section
        attack = int(len(section_time) * 0.1)
        decay = int(len(section_time) * 0.1)
        sustain = int(len(section_time) * 0.6)
        release = len(section_time) - (attack + decay + sustain)
        envelope = np.concatenate([
            np.linspace(0, 1, attack),
            np.linspace(1, 0.7, decay),
            np.ones(sustain) * 0.7,
            np.linspace(0.7, 0, release)
        ])
        tune[section_start:section_end] += wave * envelope
    # Add light percussion layer
    beat_interval = 60 / tempo
    beat_samples = int(beat_interval * sample_rate)
    for beat_pos in range(0, len(time_array), beat_samples):
        if random.random() > 0.7:  # Random skips for variation
            tune[beat_pos:beat_pos+100] += 0.1 * np.random.normal(0, 1, 100)  # Soft noise hit
    # Simple echo effect
    delay_kernel = np.exp(-np.linspace(0, 1.5, 600))
    tune = np.convolve(tune, delay_kernel, mode="same")
    # Normalize and save
    tune = tune / (np.max(np.abs(tune)) + 1e-10) * 0.75
    file_path = SOUND_FOLDER / f"tune_{int(time.time())}.wav"
    sf.write(str(file_path), tune.astype(np.float32), sample_rate)
    return str(file_path)






Step 3: Procedural Voice Function

create_synthetic_vocal same, indentation fix.

In [None]:
# Upgraded procedural voice synthesis
def create_synthetic_vocal(text_input: str, vocal_seed: int = None) -> str:
    """
    Creates a melodic vocal from text, mapping words to pitch sequences.
    Enhanced with vowel formants simulation and better timing.
    """
    if vocal_seed is not None:
        random.seed(vocal_seed)
        np.random.seed(vocal_seed)
    sample_rate = 22050
    words = text_input.split()
    num_words = len(words)
    total_length = 3.0 + num_words * 0.15  # Adjusted for natural pacing
    time_array = np.linspace(0, total_length, int(sample_rate * total_length), endpoint=False)
    vocal_track = np.zeros_like(time_array)
    segment_duration = len(time_array) // max(1, num_words)
    for word_idx, word in enumerate(words):
        base_pitch = 200 + (word_idx % 10) * 25  # Varied scale
        segment_start = word_idx * segment_duration
        segment_end = min(len(time_array), (word_idx + 1) * segment_duration)
        segment_length = segment_end - segment_start
        if segment_length <= 0:
            continue
        segment_time = np.linspace(0, segment_length / sample_rate, segment_length, endpoint=False)
        # Simulate formants for vowel-like sound
        formant1 = np.sin(2 * np.pi * base_pitch * segment_time)
        formant2 = 0.6 * np.sin(2 * np.pi * (base_pitch * 1.5 + random.uniform(-10, 10)) * segment_time)
        formant3 = 0.3 * np.sin(2 * np.pi * (base_pitch * 2.2 + random.uniform(-15, 15)) * segment_time)
        combined = formant1 + formant2 + formant3
        # Envelope for each word: smoother curve
        envelope = np.sin(np.pi * np.linspace(0, 1, segment_length)) ** 2  # Raised sine for swell
        vocal_track[segment_start:segment_end] += combined * envelope * 0.6
    # Add light vibrato globally
    vibrato = 1 + 0.02 * np.sin(2 * np.pi * 5 * time_array)  # 5Hz vibrato
    vocal_track *= vibrato
    # Normalize and add subtle noise for realism
    vocal_track += 0.005 * np.random.randn(len(vocal_track))
    vocal_track = vocal_track / (np.max(np.abs(vocal_track)) + 1e-10) * 0.8
    file_path = SOUND_FOLDER / f"vocal_{int(time.time())}.wav"
    sf.write(str(file_path), vocal_track.astype(np.float32), sample_rate)
    return str(file_path)








Step 4: Placeholder Functions for Advanced Generation


In [None]:
# Placeholder for advanced music generation (e.g., via API)
def advanced_tune_generation(desc: str, length: int = 15) -> str:
    """
    Stub for integrating external music API (e.g., custom endpoint).
    For now, falls back to procedural.
    """
    # In a real setup, call API here
    logger.info("Using fallback procedural for advanced tune")
    return create_background_tune(length_seconds=length)

# Placeholder for advanced voice generation
def advanced_vocal_generation(text: str, ref_audio: str = None) -> str:
    """
    Stub for external voice synthesis API.
    Falls back to procedural.
    """
    logger.info("Using fallback procedural for advanced vocal")
    return create_synthetic_vocal(text)





Step 5: DB Logging Function

SQL placeholders.

In [None]:
# Record creation in DB
def log_creation(entry_type: str, desc: str, output: str, source: str = "local", ref: str = None) -> str:
    entry_id = str(uuid.uuid4())
    now = time.time()
    db_cursor.execute(
        "INSERT INTO creations (creation_id, type, description, reference_audio, output_file, source, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
        (entry_id, entry_type, desc, ref, output, source, now)
    )
    db_conn.commit()
    return entry_id

Step 6: UI Handler Functions

Added type checks for ref_upload.

In [None]:
# UI handler for tune generation
def handle_tune_creation(desc, length, advanced_mode):
    try:
        if advanced_mode:
            file = advanced_tune_generation(desc, length=int(length))
            src = "advanced"
        else:
            file = create_background_tune(length_seconds=int(length))
            src = "local"
        entry_id = log_creation("tune", desc, file, src)
        return f"Tune created successfully (ID: {entry_id}) with {src} method", file
    except Exception as err:
        logger.error(f"Tune creation failed: {err}")
        return f"Failed to create tune: {str(err)}", None

# UI handler for vocal generation
def handle_vocal_creation(text, ref_upload, advanced_mode):
    try:
        ref_path = None
        if ref_upload is not None:
            if isinstance(ref_upload, tuple) and len(ref_upload) == 2:  # (sr, data)
                sr, data = ref_upload
                ref_path = SOUND_FOLDER / f"ref_{int(time.time())}.wav"
                sf.write(str(ref_path), np.array(data), sr)
            elif isinstance(ref_upload, str) and os.path.exists(ref_upload):
                ref_path = SOUND_FOLDER / f"ref_copy_{int(time.time())}.wav"
                shutil.copy(ref_upload, ref_path)
        if advanced_mode:
            file = advanced_vocal_generation(text, ref_path=str(ref_path) if ref_path else None)
            src = "advanced"
        else:
            file = create_synthetic_vocal(text)
            src = "local"
        entry_id = log_creation("vocal", text, file, src, str(ref_path) if ref_path else None)
        return f"Vocal created successfully (ID: {entry_id}) with {src} method", file
    except Exception as err:
        logger.error(f"Vocal creation failed: {err}")
        return f"Failed to create vocal: {str(err)}", None







Step 7: Gradio Interface Function and Main Block

Added theme to Gradio, fixed name == "main".

In [18]:
# Construct and start the interface
def start_interface(public_share: bool = True, open_browser: bool = False):
    with gr.Blocks(theme=gr.themes.Soft()) as interface:
        gr.Markdown("# Creative Audio Tool - Enhanced Edition")

        with gr.Tab("Tune Creator"):
            tune_desc = gr.Textbox(lines=4, label="Describe the tune (e.g., upbeat synthwave with drums)", value="Relaxing piano melody with soft strings")
            tune_length = gr.Slider(10, 90, value=30, step=5, label="Length in seconds")
            use_advanced_tune = gr.Checkbox(label="Enable advanced generation (if configured)", value=False)
            create_tune_btn = gr.Button("Create Tune")
            tune_status = gr.Textbox(label="Result")
            tune_output = gr.Audio(label="Your Tune")

        with gr.Tab("Vocal Creator"):
            vocal_text = gr.Textbox(lines=4, label="Text or lyrics for vocal", value="Welcome to the future of sound creation.")
            vocal_ref = gr.Audio(type="numpy", label="Optional reference audio for style")
            use_advanced_vocal = gr.Checkbox(label="Enable advanced vocal synthesis (if configured)", value=False)
            create_vocal_btn = gr.Button("Create Vocal")
            vocal_status = gr.Textbox(label="Result")
            vocal_output = gr.Audio(label="Your Vocal")

        create_tune_btn.click(
            fn=handle_tune_creation,
            inputs=[tune_desc, tune_length, use_advanced_tune],
            outputs=[tune_status, tune_output]
        )

        create_vocal_btn.click(
            fn=handle_vocal_creation,
            inputs=[vocal_text, vocal_ref, use_advanced_vocal],
            outputs=[vocal_status, vocal_output]
        )

    interface.launch(share=public_share, inbrowser=open_browser)
    print("Interface is running. Check for the Gradio URL if in a notebook.")

if __name__ == "__main__":
    start_interface(public_share=True, open_browser=False)