## git clone project and install independencies

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

# cd to current directory
%cd /content/drive/MyDrive/riffusion_project/

!rm -rf Training-Free-StyleID

# clone project
!git clone --depth 1 https://github.com/michaelku1/Training-Free-StyleID.git
# install environment
!curl -L https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -o miniconda.sh
!chmod +x miniconda.sh
!sh miniconda.sh -b -p /content/miniconda
!/content/miniconda/bin/pip install -r requirements.txt
!/content/miniconda/bin/pip install --upgrade ipython ipykernel

%cd ./Training-Free-StyleID
# mitigate ModuleNotFoundError: No module named 'dacite'
! pip install dacite flask_cors argh pyngrok

Mounted at /content/drive/
/content/drive/MyDrive/riffusion_project
Cloning into 'Training-Free-StyleID'...
remote: Enumerating objects: 141, done.[K
remote: Counting objects: 100% (141/141), done.[K
remote: Compressing objects: 100% (131/131), done.[K
remote: Total 141 (delta 18), reused 102 (delta 5), pack-reused 0 (from 0)[K
Receiving objects: 100% (141/141), 195.09 KiB | 5.74 MiB/s, done.
Resolving deltas: 100% (18/18), done.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 73.0M  100 73.0M    0     0  8680k      0  0:00:08  0:00:08 --:--:-- 8679k
PREFIX=/content/miniconda
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ done
Solving environment: / - \ done

## Package Plan ##

  environment location: /content/miniconda

  added / updated specs:
    - _libgcc_mutex==0.1=main
    - _openmp_mutex==4.5=1_gnu
    - brotlipy==0.7.0=

## run server

In [2]:
"""
Flask server that serves the riffusion model as an API.
"""

import dataclasses
import io
import json
import logging
import time
import typing as T
from pathlib import Path

import dacite
import flask
import PIL
import torch
from flask_cors import CORS
from pyngrok import ngrok

# Fix CUDA linear algebra backend to avoid cusolver errors
torch.backends.cuda.preferred_linalg_library('magma')

# NOTE original riffusion pipeline
from riffusion.riffusion_pipeline import RiffusionPipeline
from riffusion.datatypes import InferenceInput, InferenceOutput

from riffusion.spectrogram_image_converter import SpectrogramImageConverter
from riffusion.spectrogram_params import SpectrogramParams

from riffusion.util import base64_util

# from flask_ngrok import run_with_ngrok
NGROK_AUTH_TOKEN = "32MmrpMI4sZN558sIugyRuhDgDg_5AdY64F9xihYgNZZfyHJL"

# Flask app with CORS
app = flask.Flask(__name__)
CORS(app)

# Create a logger object
logger = logging.getLogger("my_server")
logger.setLevel(logging.DEBUG)

# Log at the INFO level to both stdout and disk
logging.basicConfig(level=logging.INFO)
logging.getLogger().addHandler(logging.FileHandler("server.log"))

# Create a file handler to write logs to a file
file_handler = logging.FileHandler("server.log")
file_handler.setLevel(logging.DEBUG)

# set format
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')

# initalise file handler
file_handler.setFormatter(formatter)

# initalise console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)

# Add handlers to the logger
logger.addHandler(file_handler)

# Global variable for the model pipeline
PIPELINE: T.Optional[RiffusionPipeline] = None

# set auth token for free n_grok usage
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

  \( - literal character '('
  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):




In [3]:
def compute_request(
    inputs: InferenceInput,
    pipeline: RiffusionPipeline,
) -> T.Union[str, T.Tuple[str, int]]:
    """
    Does all the heavy lifting of the request.

    Args:
        inputs: The input dataclass
        pipeline: The riffusion model pipeline
    """

    # Load the seed image by ID
    init_image_path = Path(f"{inputs.seed_image_path}.png")

    print("######################### input image path: ", init_image_path)

    if not init_image_path.is_file():
        return f"Invalid seed image: {inputs.seed_image_path}", 400
    init_image = PIL.Image.open(str(init_image_path)).convert("RGB")

    # Load the mask image by ID
    mask_image: T.Optional[PIL.Image.Image] = None

    # NOTE pass mask image here
    # mask_image = PIL.Image.open("...png").convert("RGB")
    if inputs.mask_image_path:
        mask_image_path = Path(f"{inputs.mask_image_path}.png")
        if not mask_image_path.is_file():
            return f"Invalid mask image: {inputs.mask_image_path}", 400
        mask_image = PIL.Image.open(str(mask_image_path)).convert("RGB")

    print("inputs:", inputs)
    print("init_image", init_image)
    print("mask_image", mask_image)

    # Execute the model to get the spectrogram image
    image = pipeline.riffuse(
        inputs,
        init_image=init_image,
        mask_image=mask_image,
    )

    # TODO(hayk): Change the frequency range to [20, 20k] once the model is retrained
    params = SpectrogramParams(
        min_frequency=0,
        max_frequency=10000,
    )

    # Reconstruct audio from the image
    # TODO(hayk): It may help performance a bit to cache this object
    # Use CPU for audio processing to avoid CUDA solver issues
    converter = SpectrogramImageConverter(params=params, device="cpu")

    # NOTE 轉回 audio signal
    segment = converter.audio_from_spectrogram_image(
        image,
        apply_filters=True,
    )

    # Export audio to MP3 bytes
    mp3_bytes = io.BytesIO()
    segment.export(mp3_bytes, format="mp3")
    mp3_bytes.seek(0)

    # Export image to JPEG bytes
    image_bytes = io.BytesIO()
    image.save(image_bytes, exif=image.getexif(), format="JPEG")
    image_bytes.seek(0)

    # Assemble the output dataclass
    output = InferenceOutput(
        image="data:image/jpeg;base64," + base64_util.encode(image_bytes),
        audio="data:audio/mpeg;base64," + base64_util.encode(mp3_bytes),
        duration_s=segment.duration_seconds,
    )

    # release memory
    import gc
    del image, mask_image, init_image  # delete big tensors
    gc.collect()
    torch.cuda.empty_cache()  # free cached memory
    torch.cuda.ipc_collect()  # (optional) reclaim inter-process memory

    output_name = f"{''.join(inputs.seed_image_path.split('/')[-2:])}_to_{''.join(inputs.mask_image_path.split('/')[-2:])}"

    with open(f"{inputs.output_path}/{output_name}.json", "w") as f:
        json.dump(dataclasses.asdict(output), f, indent=2, ensure_ascii=False)

    print("output json path:", output_name, flush=True)

    return output

In [4]:
# Where built-in seed images are stored
# import traceback
# def run_app_background(*args, **kwargs):
#     try:
#         # Your existing Flask + ngrok code
#         global PIPELINE

#         import logging, sys
#         logging.basicConfig(
#             level=logging.DEBUG,
#             format="%(asctime)s [%(levelname)s] %(message)s",
#             handlers=[logging.StreamHandler(sys.stdout)]
#         )
#         app.logger.setLevel(logging.DEBUG)

#         app.logger.info("Loading RiffusionPipeline...")
#         PIPELINE = RiffusionPipeline.load_checkpoint(
#             checkpoint=kwargs.get("checkpoint", "riffusion/riffusion-model-v1"),
#             use_traced_unet=not kwargs.get("no_traced_unet", False),
#             device=kwargs.get("device", "cuda")
#         )
#         app.logger.info("Pipeline loaded successfully!")

#         public_url = ngrok.connect(kwargs.get("port", 5000))
#         print(f" * ngrok tunnel URL: {public_url}", flush=True)

#         app.logger.info(f"Starting Flask server on port {kwargs.get('port', 5000)}...")
#         app.run(port=kwargs.get("port", 5000), debug=kwargs.get("debug", True), use_reloader=False)

#     except Exception:
#         print("Exception in background thread:", flush=True)
#         traceback.print_exc()

def run_app(
    *,
    checkpoint: str = "riffusion/riffusion-model-v1",
    no_traced_unet: bool = False,
    device: str = "cuda",
    port: int = 5000,
    debug: bool = False,
):
    """
    Run a Flask API that serves the given riffusion model checkpoint
    and exposes it via ngrok.
    """
    global PIPELINE

    # Initialize the model
    PIPELINE = RiffusionPipeline.load_checkpoint(
        checkpoint=checkpoint,
        use_traced_unet=not no_traced_unet,
        device=device,
    )

    # Set debug mode
    app.debug = debug

    # Start ngrok tunnel
    public_url = ngrok.connect(port)
    print(f" * ngrok tunnel URL: {public_url}", flush=True)

    # Start Flask server
    app.run(port=port)


@app.route("/run_inference/", methods=["POST"])
def run_inference():
    """
    Execute the riffusion model as an API.

    Inputs:
        Serialized JSON of the InferenceInput dataclass

    Returns:
        Serialized JSON of the InferenceOutput dataclass
    """
    start_time = time.time()

    # Parse the payload as JSON
    json_data = json.loads(flask.request.data)

    # Log the request
    logging.info(json_data)

    # Parse an InferenceInput dataclass from the payload
    try:
        inputs = dacite.from_dict(InferenceInput, json_data)
    except dacite.exceptions.WrongTypeError as exception:
        logging.info(json_data)
        return str(exception), 400
    except dacite.exceptions.MissingValueError as exception:
        logging.info(json_data)
        return str(exception), 400

    # NOTE
    response = compute_request(
        inputs=inputs,
        pipeline=PIPELINE,
    )

    # Log the total time
    logging.info(f"Request took {time.time() - start_time:.2f} s")

    return response


# @app.route("/run_inference/", methods=["POST"])
# def run_inference():
#     """
#     Execute the riffusion model as an API.

#     Inputs:
#         Serialized JSON of the InferenceInput dataclass

#     Returns:
#         Serialized JSON of the InferenceOutput dataclass
#     """
#     start_time = time.time()

#     # Parse the payload as JSON
#     json_data = json.loads(flask.request.data)

#     # Log the request
#     logging.info(json_data)

#     # Parse an InferenceInput dataclass from the payload
#     try:
#         inputs = dacite.from_dict(InferenceInput, json_data)
#     except dacite.exceptions.WrongTypeError as exception:
#         logging.info(json_data)
#         return str(exception), 400
#     except dacite.exceptions.MissingValueError as exception:
#         logging.info(json_data)
#         return str(exception), 400

#     # NOTE
#     response = compute_request(
#         inputs=inputs,
#         pipeline=PIPELINE,
#     )

#     # Log the total time
#     logging.info(f"Request took {time.time() - start_time:.2f} s")

#     return response

def start_server():
  run_app()

In [5]:
# set to background thread
import threading
import time
threading.Thread(target=start_server, daemon=True).start()

# Give server time to start
time.sleep(5) # may need longer startup

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

An error occurred while trying to fetch riffusion/riffusion-model-v1: riffusion/riffusion-model-v1 does not appear to have a file named diffusion_pytorch_model.safetensors.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


vae/diffusion_pytorch_model.bin:   0%|          | 0.00/335M [00:00<?, ?B/s]

In [7]:
# check if the server is running (keep checking, if this does not show anything then it means model is still uploading)
!lsof -i:5000

COMMAND  PID USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
python3 1254 root   79u  IPv4 105096      0t0  TCP localhost:5000 (LISTEN)


## simple experiment setup

In [None]:
# run inference
CUDA_DEVICE=1
START_SEED=42
END_SEED=123

DENOISING=0.2
GUIDANCE=0.2
ALPHA=0
STEPS=100
OUTPUT_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/audio"
# SEED_IMAGE_PATH="/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/accordian123/1"
MASK_IMAGE_PATH="/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1"
SEED_IMAGE_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2"

# Run curl command
# !CUDA_VISIBLE_DEVICES="$CUDA_DEVICE" curl -X POST http://127.0.0.1:5000/run_inference/ -H "Content-Type: application/json" -d '{"start":{"prompt":"","seed":'"$START_SEED"',"denoising":'"$DENOISING"',"guidance":'"$GUIDANCE"'},"num_inference_steps":'"$STEPS"',"seed_image_path":"'"$SEED_IMAGE_PATH"'","mask_image_path":"'"$MASK_IMAGE_PATH"'","alpha":'"$ALPHA"',"end":{"prompt":"","seed":'"$END_SEED"',"denoising":'"$DENOISING"',"guidance":'"$GUIDANCE"', "output_path": '"$OUTPUT_PATH"'}}'

In [None]:
# ignore TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
import requests
import json
import os

data = {
    "start": {"prompt": "", "seed": START_SEED, "denoising": DENOISING, "guidance": GUIDANCE},
    "num_inference_steps": STEPS,
    "seed_image_path": SEED_IMAGE_PATH,
    "mask_image_path": MASK_IMAGE_PATH,
    "alpha": ALPHA,
    "end": {"prompt": "", "seed": END_SEED, "denoising": DENOISING, "guidance": GUIDANCE},
    "output_path": OUTPUT_PATH,
}

try:
    response = requests.post("http://127.0.0.1:5000/run_inference/", json=data)
    logger.info(f"Response status code: {response.status_code}")
    logger.info(f"Response text: {response.text[:500]}")  # limit output to first 500 chars
except Exception as e:
    logger.error(f"Request failed: {e}")

ERROR:my_server:Request failed: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /run_inference/ (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7809ffb21d30>: Failed to establish a new connection: [Errno 111] Connection refused'))


## multstyle simple experiment setup


In [8]:
# run inference
CUDA_DEVICE=1
START_SEED=42
END_SEED=123

DENOISING=0.2
GUIDANCE=0.2
ALPHA=0
STEPS=100
OUTPUT_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/audio"
# SEED_IMAGE_PATH="/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/accordian123/1"
SEED_IMAGE_PATH = "/content/drive/MyDrive/riffusion_project/spectrogram_images/AudioDI_DI_1/1"
MASK_IMAGE_PATH = ["/content/drive/MyDrive/riffusion_project/spectrogram_images/Tone_Chopper_DI_1/1", "/content/drive/MyDrive/riffusion_project/spectrogram_images/Tone_Dark Soul_DI_1/1",]

# Run curl command
# !CUDA_VISIBLE_DEVICES="$CUDA_DEVICE" curl -X POST http://127.0.0.1:5000/run_inference/ -H "Content-Type: application/json" -d '{"start":{"prompt":"","seed":'"$START_SEED"',"denoising":'"$DENOISING"',"guidance":'"$GUIDANCE"'},"num_inference_steps":'"$STEPS"',"seed_image_path":"'"$SEED_IMAGE_PATH"'","mask_image_path":"'"$MASK_IMAGE_PATH"'","alpha":'"$ALPHA"',"end":{"prompt":"","seed":'"$END_SEED"',"denoising":'"$DENOISING"',"guidance":'"$GUIDANCE"', "output_path": '"$OUTPUT_PATH"'}}'

In [9]:
# ignore TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
import requests
import json
import os

data = {
    "start": {"prompt": "", "seed": START_SEED, "denoising": DENOISING, "guidance": GUIDANCE},
    "num_inference_steps": STEPS,
    "seed_image_path": SEED_IMAGE_PATH,
    "mask_image_path": MASK_IMAGE_PATH,
    "alpha": ALPHA,
    "end": {"prompt": "", "seed": END_SEED, "denoising": DENOISING, "guidance": GUIDANCE},
    "output_path": OUTPUT_PATH,
}

try:
    response = requests.post("http://127.0.0.1:5000/run_inference/", json=data)
    logger.info(f"Response status code: {response.status_code}")
    logger.info(f"Response text: {response.text[:500]}")  # limit output to first 500 chars
except Exception as e:
    logger.error(f"Request failed: {e}")

######################### input image path:  /content/drive/MyDrive/riffusion_project/spectrogram_images/AudioDI_DI_1/1.png


INFO:werkzeug:127.0.0.1 - - [16/Sep/2025 11:51:49] "[31m[1mPOST /run_inference/ HTTP/1.1[0m" 400 -
INFO:my_server:Response status code: 400
INFO:my_server:Response text: Invalid mask image: ['/content/drive/MyDrive/riffusion_project/spectrogram_images/Tone_Chopper_DI_1/1', '/content/drive/MyDrive/riffusion_project/spectrogram_images/Tone_Dark Soul_DI_1/1']


In [None]:
# base64 decode

#!/usr/bin/env python3
"""
Script to decode JSON response from riffusion API and extract audio as WAV file.
"""

import json
import base64
import io
from pathlib import Path

def decode_audio_from_json(json_file_path, output_wav_path):
    """
    Decode audio from JSON response and save as WAV file.

    Args:
        json_file_path: Path to the JSON file containing the API response
        output_wav_path: Path where to save the WAV file
    """
    # Read the JSON file
    with open(json_file_path, 'r') as f:
        data = json.load(f)

    # Extract the audio data (base64 encoded)
    audio_base64 = data.get('audio', '')

    if not audio_base64:
        print("No audio data found in JSON response")
        return

    # Remove the data URL prefix if present
    if audio_base64.startswith('data:audio/mpeg;base64,'):
        audio_base64 = audio_base64.replace('data:audio/mpeg;base64,', '')

    # Decode base64 to binary
    try:
        audio_binary = base64.b64decode(audio_base64)
        print(f"Successfully decoded {len(audio_binary)} bytes of audio data")

        # Save as WAV file
        with open(output_wav_path, 'wb') as f:
            f.write(audio_binary)

        print(f"Audio saved as: {output_wav_path}")

        # Also save the spectrogram image if present
        image_base64 = data.get('image', '')
        if image_base64:
            if image_base64.startswith('data:image/jpeg;base64,'):
                image_base64 = image_base64.replace('data:image/jpeg;base64,', '')

            image_binary = base64.b64decode(image_base64)

            # NOTE save the generated output audio spectrogram image
            # image_path = output_wav_path.replace('.wav', '_spectrogram.jpg')

            # with open(image_path, 'wb') as f:
            #     f.write(image_binary)

            # print(f"Spectrogram saved as: {image_path}")

        # Print duration
        duration = data.get('duration_s', 0)
        print(f"Audio duration: {duration:.2f} seconds")

    except Exception as e:
        print(f"Error decoding audio: {e}")

    import argparse

In [None]:
# Decode the response.json file
JSON_PATH = None
OUTPUT_PATH = None
decode_audio_from_json(JSON_PATH, OUTPUT_PATH)

## batch experiment with varying egdb tones

In [9]:
!bash ./batch_run.sh DI_1 10

Starting batch inference runs with EGDB_DI_1 images...
Running all clean vs chopper combinations:
Running inference: clean1 vs chopper1
Seed image: /home/mku666/riffusion-hobby/results/riffusion_seed_mask_images/EGDB_DI_1/clean/1
Mask image: /home/mku666/riffusion-hobby/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1
----------------------------------------
curl: (7) Failed to connect to 127.0.0.1 port 3013 after 0 ms: Connection refused
Request failed for: clean1 vs chopper1



Running inference: clean1 vs chopper2
Seed image: /home/mku666/riffusion-hobby/results/riffusion_seed_mask_images/EGDB_DI_1/clean/1
Mask image: /home/mku666/riffusion-hobby/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/2
----------------------------------------
curl: (7) Failed to connect to 127.0.0.1 port 3013 after 0 ms: Connection refused
Request failed for: clean1 vs chopper2



Running inference: clean1 vs chopper3
Seed image: /home/mku666/riffusion-hobby/results/riffusion_seed_mask_images/EG

In [None]:
# !zip -r results.zip /content/results_folder

In [None]:
# !zip -r results.zip /content/results_folder
# from google.colab import files
# files.download("results.zip")

## batch experiment with varying denoising and guidance scales

In [None]:
import os

# denoising strength and guidance scale
DENOISING = [0.2, 0.3, 0.4, 0.5]
GUIDANCE = [0.2, 0.3, 0.4, 0.5]
# ALPHA = [0.2, 0.3, 0.4, 0.5]

import itertools
import requests
import json
from copy import deepcopy

# Your other constants (replace with actual values)
START_SEED=42
END_SEED=123
STEPS = 50
MASK_IMAGE_PATH="/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1"
SEED_IMAGE_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2"
ALPHA = 0.5
OUTPUT_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/audio"
API_ENDPOINT = "http://127.0.0.1:5000/run_inference/"  # Replace with your actual endpoint

# Base data template
base_data = {
    "start": {"prompt": "", "seed": START_SEED},
    "num_inference_steps": STEPS,
    "seed_image_path": SEED_IMAGE_PATH,
    "mask_image_path": MASK_IMAGE_PATH,
    "alpha": ALPHA,
    "end": {"prompt": "", "seed": END_SEED},
    "output_path": OUTPUT_PATH,
}

def safe_json_parse(response):
    """Safely parse JSON response with error handling."""
    try:
        return response.json()
    except json.JSONDecodeError:
        # Return the raw text if JSON parsing fails
        return {"raw_response": response.text, "content_type": response.headers.get('content-type', 'unknown')}

def generate_parameter_combinations():
    """Generate all combinations of denoising and guidance parameters."""
    combinations = []
    for start_denoising, start_guidance, end_denoising, end_guidance in itertools.product(
        DENOISING, GUIDANCE, DENOISING, GUIDANCE
    ):
        combo = {
            'start_denoising': start_denoising,
            'start_guidance': start_guidance,
            'end_denoising': end_denoising,
            'end_guidance': end_guidance
        }
        combinations.append(combo)
    return combinations

def create_request_data(combo, experiment_id):
    """Create request data for a specific parameter combination."""
    data = deepcopy(base_data)

    # Add the parameter combinations to start and end
    data['start']['denoising'] = combo['start_denoising']
    data['start']['guidance'] = combo['start_guidance']
    data['end']['denoising'] = combo['end_denoising']
    data['end']['guidance'] = combo['end_guidance']

    # Optionally modify output path to include experiment details
    data['output_path'] = f"{OUTPUT_PATH}/exp_{experiment_id}_sd{combo['start_denoising']}_sg{combo['start_guidance']}_ed{combo['end_denoising']}_eg{combo['end_guidance']}"

    return data

# deprecated
# def save_results(results, filename="./experiment_results.json"):
#     """Save experiment results to a JSON file."""
#     with open(filename, 'w') as f:
#         json.dump(results, f, indent=2)
#     print(f"Results saved to {filename}")

# Alternative: If you want the same denoising/guidance for both start and end
def generate_simple_combinations():
    """Generate combinations where start and end use the same parameters."""
    combinations = []
    for denoising, guidance in itertools.product(DENOISING, GUIDANCE):
        combo = {
            'denoising': denoising,
            'guidance': guidance
        }
        combinations.append(combo)
    return combinations

def create_simple_request_data(combo, experiment_id):
    """Create request data with same parameters for start and end."""
    data = deepcopy(base_data)

    # Use same parameters for both start and end
    data['start']['denoising'] = combo['denoising']
    data['start']['guidance'] = combo['guidance']
    data['end']['denoising'] = combo['denoising']
    data['end']['guidance'] = combo['guidance']

    output_path = f"{OUTPUT_PATH}/exp_{experiment_id}_d{combo['denoising']}_g{combo['guidance']}"
    data['output_path'] = output_path

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    return data

def test_api_endpoint():
    """Test the API endpoint before running experiments."""
    print(f"Testing API endpoint: {API_ENDPOINT}")

    try:
        # Try a simple GET request first
        response = requests.get(API_ENDPOINT, timeout=10)
        print(f"GET response: {response.status_code}")
        print(f"Content-Type: {response.headers.get('content-type', 'unknown')}")
        print(f"Response preview: {response.text[:200]}...")

        # Try a POST with minimal data
        test_data = {"test": "connection"}
        response = requests.post(
            API_ENDPOINT,
            json=test_data,
            headers={'Content-Type': 'application/json'},
            timeout=10
        )
        print(f"POST response: {response.status_code}")
        print(f"Content-Type: {response.headers.get('content-type', 'unknown')}")
        print(f"Response preview: {response.text[:200]}...")

    except requests.exceptions.RequestException as e:
        print(f"API endpoint test failed: {e}")
        return False

    return True

def load_experiments(json_file_path):
    """Load experiments from JSON file"""
    try:
        with open(json_file_path, 'r') as file:
            experiments = json.load(file)
        return experiments
    except FileNotFoundError:
        logger.error(f"JSON file not found: {json_file_path}")
        return None
    except json.JSONDecodeError as e:
        logger.error(f"Error decoding JSON: {e}")
        return None

def run_experiment(experiment):
    """Run a single experiment"""
    experiment_id = experiment.get('experiment_id')
    parameters = experiment.get('parameters', {})

    # Extract denoising and guidance from the experiment parameters
    denoising = parameters.get('denoising', 0.2)  # Default fallback
    guidance = parameters.get('guidance', 0.2)    # Default fallback

    # Prepare the data payload
    data = {
        "start": {
            "prompt": "",
            "seed": START_SEED,
            "denoising": denoising,
            "guidance": guidance
        },
        "num_inference_steps": STEPS,
        "seed_image_path": SEED_IMAGE_PATH,
        "mask_image_path": MASK_IMAGE_PATH,
        "alpha": ALPHA,
        "end": {
            "prompt": "",
            "seed": END_SEED,
            "denoising": denoising,
            "guidance": guidance
        },
        "output_path": f"{OUTPUT_PATH}/experiment_{experiment_id}",  # Unique output path per experiment
    }

    logger.info(f"Running experiment {experiment_id} with denoising={denoising}, guidance={guidance}")

    try:
        response = requests.post("http://127.0.0.1:5000/run_inference/", json=data)
        logger.info(f"Experiment {experiment_id} - Response status code: {response.status_code}")
        logger.info(f"Experiment {experiment_id} - Response text: {response.text[:500]}")

        # Return experiment results
        return {
            "experiment_id": experiment_id,
            "parameters": parameters,
            "status_code": response.status_code,
            "response": response.text[:500],
            "success": response.status_code == 200
        }

    except requests.exceptions.RequestException as e:
        logger.error(f"Experiment {experiment_id} - Request failed: {e}")
        return {
            "experiment_id": experiment_id,
            "parameters": parameters,
            "error": str(e),
            "success": False
        }

In [None]:
# generate experiemental configs
combinations = generate_simple_combinations()
print(f"generating {len(combinations)} simple experiments...")

generating 16 simple experiments...


In [None]:
"""
run batch request
"""

# can safely ignore TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.


# list of json results to be saved
results = []

for i, combo in enumerate(combinations):
    print(f"Experiment {i+1}/{len(combinations)}: denoising={combo['denoising']}, guidance={combo['guidance']}")

    request_data = create_simple_request_data(combo, i+1)

    try:
        response = requests.post(
            API_ENDPOINT,
            json=request_data,
            headers={'Content-Type': 'application/json'},
        )

        if response.status_code == 200:
            result = {
                'experiment_id': i+1,
                'parameters': combo,
                'status': 'success',
                'response': response.json()
            }
            print(f"✓ Experiment {i+1} completed successfully")
        else:
            result = {
                'experiment_id': i+1,
                'parameters': combo,
                'status': 'error',
                'error': f"HTTP {response.status_code}: {response.text}"
            }
            print(f"✗ Experiment {i+1} failed: HTTP {response.status_code}")

    except requests.exceptions.RequestException as e:
        result = {
            'experiment_id': i+1,
            'parameters': combo,
            'status': 'error',
            'error': str(e)
        }
        print(f"✗ Experiment {i+1} failed: {e}")

    results.append(result)

Experiment 1/16: denoising=0.2, guidance=0.2
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.2, guidance=0.2), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.2, guidance=0.2), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_1_d0.2_g0.2')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05E7657DA0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59DE50>


  0%|          | 0/10 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:24:47] "[35m[1mPOST /run_inference/ 

✗ Experiment 1 failed: HTTP 500
Experiment 2/16: denoising=0.2, guidance=0.3
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.2, guidance=0.3), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.2, guidance=0.3), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_2_d0.2_g0.3')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59E630>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59F620>


  0%|          | 0/10 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:25:44] "[35m[1mPOST /run_inference/ 

✗ Experiment 2 failed: HTTP 500
Experiment 3/16: denoising=0.2, guidance=0.4
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.2, guidance=0.4), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.2, guidance=0.4), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_3_d0.2_g0.4')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59E030>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CB151BE0>


  0%|          | 0/10 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:26:44] "[35m[1mPOST /run_inference/ 

✗ Experiment 3 failed: HTTP 500
Experiment 4/16: denoising=0.2, guidance=0.5
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.2, guidance=0.5), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.2, guidance=0.5), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_4_d0.2_g0.5')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59E6F0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59EDB0>


  0%|          | 0/10 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:27:43] "[35m[1mPOST /run_inference/ 

✗ Experiment 4 failed: HTTP 500
Experiment 5/16: denoising=0.3, guidance=0.2
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.3, guidance=0.2), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.3, guidance=0.2), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_5_d0.3_g0.2')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA59EAE0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D060E75BD10>


  0%|          | 0/15 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:28:46] "[35m[1mPOST /run_inference/ 

✗ Experiment 5 failed: HTTP 500
Experiment 6/16: denoising=0.3, guidance=0.3
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.3, guidance=0.3), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.3, guidance=0.3), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_6_d0.3_g0.3')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA346630>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3450D0>


  0%|          | 0/15 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:29:48] "[35m[1mPOST /run_inference/ 

✗ Experiment 6 failed: HTTP 500
Experiment 7/16: denoising=0.3, guidance=0.4
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.3, guidance=0.4), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.3, guidance=0.4), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_7_d0.3_g0.4')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA347F80>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA347AA0>


  0%|          | 0/15 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:30:50] "[35m[1mPOST /run_inference/ 

✗ Experiment 7 failed: HTTP 500
Experiment 8/16: denoising=0.3, guidance=0.5
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.3, guidance=0.5), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.3, guidance=0.5), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_8_d0.3_g0.5')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA344290>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3476B0>


  0%|          | 0/15 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:31:52] "[35m[1mPOST /run_inference/ 

✗ Experiment 8 failed: HTTP 500
Experiment 9/16: denoising=0.4, guidance=0.2
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.4, guidance=0.2), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.4, guidance=0.2), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_9_d0.4_g0.2')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3A45C0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3A5490>


  0%|          | 0/20 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:32:58] "[35m[1mPOST /run_inference/ 

✗ Experiment 9 failed: HTTP 500
Experiment 10/16: denoising=0.4, guidance=0.3
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.4, guidance=0.3), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.4, guidance=0.3), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_10_d0.4_g0.3')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA345EE0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA344AD0>


  0%|          | 0/20 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:34:04] "[35m[1mPOST /run_inference/ 

✗ Experiment 10 failed: HTTP 500
Experiment 11/16: denoising=0.4, guidance=0.4
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.4, guidance=0.4), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.4, guidance=0.4), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_11_d0.4_g0.4')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05E6E064E0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05E6E06E40>


  0%|          | 0/20 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:35:10] "[35m[1mPOST /run_inference/ 

✗ Experiment 11 failed: HTTP 500
Experiment 12/16: denoising=0.4, guidance=0.5
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.4, guidance=0.5), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.4, guidance=0.5), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_12_d0.4_g0.5')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3466F0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA346360>


  0%|          | 0/20 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:36:17] "[35m[1mPOST /run_inference/ 

✗ Experiment 12 failed: HTTP 500
Experiment 13/16: denoising=0.5, guidance=0.2
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.5, guidance=0.2), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.5, guidance=0.2), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_13_d0.5_g0.2')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA310BF0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3109E0>


  0%|          | 0/25 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:37:29] "[35m[1mPOST /run_inference/ 

✗ Experiment 13 failed: HTTP 500
Experiment 14/16: denoising=0.5, guidance=0.3
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.5, guidance=0.3), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.5, guidance=0.3), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_14_d0.5_g0.3')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3465A0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA346C90>


  0%|          | 0/25 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:38:47] "[35m[1mPOST /run_inference/ 

✗ Experiment 14 failed: HTTP 500
Experiment 15/16: denoising=0.5, guidance=0.4
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.5, guidance=0.4), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.5, guidance=0.4), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_15_d0.5_g0.4')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA345550>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05F6286AE0>


  0%|          | 0/25 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:39:59] "[35m[1mPOST /run_inference/ 

✗ Experiment 15 failed: HTTP 500
Experiment 16/16: denoising=0.5, guidance=0.5
######################### input image path:  /content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2.png
inputs: InferenceInput(start=PromptInput(prompt='', seed=42, negative_prompt=None, denoising=0.5, guidance=0.5), end=PromptInput(prompt='', seed=123, negative_prompt=None, denoising=0.5, guidance=0.5), alpha=0.5, num_inference_steps=50, seed_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/clean/2', mask_image_path='/content/drive/MyDrive/Training-Free-StyleID/results/riffusion_seed_mask_images/EGDB_DI_1/chopper/1', output_path='/content/drive/MyDrive/Training-Free-StyleID/results/audio/exp_16_d0.5_g0.5')
init_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA3465A0>
mask_image <PIL.Image.Image image mode=RGB size=1816x512 at 0x7D05CA344A40>


  0%|          | 0/25 [00:00<?, ?it/s]

output json path: clean2_to_chopper1


ERROR:__main__:Exception on /run_inference/ [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 920, in full_dispatch_request
    return self.finalize_request(rv)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 939, in finalize_request
    response = self.make_response(rv)
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/flask/app.py", line 1249, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a InferenceOutput.
INFO:werkzeug:127.0.0.1 - - [08/Sep/2025 15:41:09] "[35m[1mPOST /run_inference/ 

✗ Experiment 16 failed: HTTP 500


In [None]:
# deprecated
# save_results(results)

## batch decode

In [None]:
import json
import base64
import io
import logging
from pathlib import Path

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def decode_audio_from_json(json_file_path, output_wav_path):
    """
    Decode audio from JSON response and save as WAV file.

    Args:
        json_file_path: Path to the JSON file containing the API response
        output_wav_path: Path where to save the WAV file

    Returns:
        bool: True if successful, False otherwise
    """
    try:
        # Read the JSON file
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        # Extract the audio data (base64 encoded)
        audio_base64 = data.get('audio', '')

        if not audio_base64:
            logger.warning(f"No audio data found in {json_file_path}")
            return False

        # Remove the data URL prefix if present
        if audio_base64.startswith('data:audio/mpeg;base64,'):
            audio_base64 = audio_base64.replace('data:audio/mpeg;base64,', '')
        elif audio_base64.startswith('data:audio/wav;base64,'):
            audio_base64 = audio_base64.replace('data:audio/wav;base64,', '')

        # Decode base64 to binary
        audio_binary = base64.b64decode(audio_base64)
        logger.info(f"Successfully decoded {len(audio_binary)} bytes of audio data from {json_file_path}")

        # Create output directory if it doesn't exist
        output_path = Path(output_wav_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # Save as WAV file
        with open(output_wav_path, 'wb') as f:
            f.write(audio_binary)

        logger.info(f"Audio saved as: {output_wav_path}")

        # Also save the spectrogram image if present (optional)
        image_base64 = data.get('image', '')
        if image_base64:
            if image_base64.startswith('data:image/jpeg;base64,'):
                image_base64 = image_base64.replace('data:image/jpeg;base64,', '')
            elif image_base64.startswith('data:image/png;base64,'):
                image_base64 = image_base64.replace('data:image/png;base64,', '')

            image_binary = base64.b64decode(image_base64)

            # Uncomment if you want to save spectrograms
            # image_path = output_wav_path.replace('.wav', '_spectrogram.jpg')
            # with open(image_path, 'wb') as f:
            #     f.write(image_binary)
            # logger.info(f"Spectrogram saved as: {image_path}")

        # Print duration if available
        duration = data.get('duration_s', 0)
        if duration > 0:
            logger.info(f"Audio duration: {duration:.2f} seconds")

        return True

    except json.JSONDecodeError as e:
        logger.error(f"Error reading JSON file {json_file_path}: {e}")
        return False
    except base64.binascii.Error as e:
        logger.error(f"Error decoding base64 data from {json_file_path}: {e}")
        return False
    except Exception as e:
        logger.error(f"Error processing {json_file_path}: {e}")
        return False

def batch_decode_audio_from_directory(directory_path, recursive=True):
    """
    Process all JSON files in a directory (and optionally its subdirectories) and decode audio from each one.

    Args:
        directory_path: Path to the directory containing JSON files
        recursive: If True, search subdirectories recursively (default: True)
    """
    directory = Path(directory_path)

    if not directory.exists():
        logger.error(f"Directory does not exist: {directory_path}")
        return

    if not directory.is_dir():
        logger.error(f"Path is not a directory: {directory_path}")
        return

    # Find all JSON files in the directory (and subdirectories if recursive)
    if recursive:
        json_files = list(directory.rglob("*.json"))  # rglob for recursive search
        logger.info(f"Recursively searching for JSON files in: {directory_path}")
    else:
        json_files = list(directory.glob("*.json"))   # glob for current directory only
        logger.info(f"Searching for JSON files in: {directory_path}")

    if not json_files:
        logger.warning(f"No JSON files found in directory: {directory_path}")
        return

    logger.info(f"Found {len(json_files)} JSON files to process")

    # Group files by subdirectory for better logging
    subdirs = {}
    for json_file in json_files:
        relative_path = json_file.relative_to(directory)
        subdir = str(relative_path.parent) if relative_path.parent != Path('.') else 'root'
        if subdir not in subdirs:
            subdirs[subdir] = []
        subdirs[subdir].append(json_file)

    # Log the distribution of files across subdirectories
    for subdir, files in subdirs.items():
        logger.info(f"  {subdir}: {len(files)} JSON file(s)")

    successful_conversions = 0
    failed_conversions = 0

    # Process each JSON file
    for json_file in json_files:
        # Get relative path for better logging
        relative_path = json_file.relative_to(directory)
        logger.info(f"Processing: {relative_path}")

        # Generate output WAV path (same location as JSON file, but with .wav extension)
        wav_file = json_file.with_suffix('.wav')

        # Skip if WAV file already exists (optional - remove this check if you want to overwrite)
        if wav_file.exists():
            logger.info(f"WAV file already exists, skipping: {wav_file.relative_to(directory)}")
            continue

        # Decode the audio
        success = decode_audio_from_json(json_file, wav_file)

        if success:
            successful_conversions += 1
        else:
            failed_conversions += 1

        logger.info("-" * 50)  # Separator for readability

    # Summary
    logger.info(f"Batch processing completed!")
    logger.info(f"Successfully converted: {successful_conversions} files")
    logger.info(f"Failed conversions: {failed_conversions} files")
    logger.info(f"Total processed: {len(json_files)} files")

def batch_decode_audio_from_directory_non_recursive(directory_path):
    """
    Convenience function to process only the specified directory without recursion.

    Args:
        directory_path: Path to the directory containing JSON files
    """
    batch_decode_audio_from_directory(directory_path, recursive=False)

In [None]:
# Process all JSON files recursively in subdirectories
DIR_PATH = "/content/drive/MyDrive/Training-Free-StyleID/results/audio/"
batch_decode_audio_from_directory(DIR_PATH)