# idée
=> get score from 1/f, then modulate blended audio using other features

In [None]:
import os
import cv2
import numpy as np
from pydub import AudioSegment
from pydub.effects import low_pass_filter
from skimage.metrics import structural_similarity as ssim

# 1. Extract Features
def extract_image_features(image_path):
    img_bgr = cv2.imread(image_path)
    if img_bgr is None:
        raise ValueError(f"Could not load image: {image_path}")
    img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    img_gray = img_gray.astype(np.float32) / 255.0

    # HSV color features
    img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
    hsv_mean = np.mean(img_hsv, axis=(0, 1)) / 255.0
    hsv_std = np.std(img_hsv, axis=(0, 1)) / 255.0
    hue_mean = hsv_mean[0] * (180 / 255) / 180  # Normalize to [0,1]

    # Edge density
    edges = cv2.Canny((img_gray * 255).astype(np.uint8), 100, 200)
    edge_density = np.sum(edges > 0) / edges.size

    # 1/f slope
    f_transform = np.fft.fft2(img_gray)
    f_shift = np.fft.fftshift(f_transform)
    power_spectrum = np.abs(f_shift) ** 2
    h, w = power_spectrum.shape
    y, x = np.indices((h, w))
    center = (h // 2, w // 2)
    r = np.sqrt((x - center[1]) ** 2 + (y - center[0]) ** 2).astype(np.int32)
    radial_sum = np.bincount(r.ravel(), power_spectrum.ravel())
    radial_count = np.bincount(r.ravel())
    radial_profile = radial_sum / (radial_count + 1e-8)
    freqs = np.arange(len(radial_profile))
    mask = freqs > 1
    if np.any(mask):
        log_freqs = np.log(freqs[mask])
        log_power = np.log(radial_profile[mask])
        slope, _ = np.polyfit(log_freqs, log_power, 1)
    else:
        slope = 0.0

    # Symmetry
    left = img_gray[:, :w // 2]
    right = np.fliplr(img_gray[:, w - w // 2:])
    min_width = min(left.shape[1], right.shape[1])
    symmetry_score = ssim(left[:, :min_width], right[:, :min_width])

    return {
        "color_hue": hue_mean,
        "contrast": np.std(img_gray),
        "edge_density": edge_density,
        "slope": slope,
        "symmetry": symmetry_score
    }


def blend_audio_from_slope(nature_audio_path, techno_audio_path, slope, output_path, nature_image_path, techno_image_path):
    from pydub import AudioSegment
    import numpy as np

    # 1. Extract slopes from reference images
    slope_nature = extract_image_features(nature_image_path)["slope"]
    slope_techno = extract_image_features(techno_image_path)["slope"]

    # 2. Order slopes
    min_slope = min(slope_nature, slope_techno)
    max_slope = max(slope_nature, slope_techno)

    if slope == slope_nature:
        print("Slope is equal to the nature reference slope.")
        AudioSegment.from_file(nature_audio_path).export(output_path, format="wav")
        return output_path
    elif slope == slope_techno:
        print("Slope is equal to the techno reference slope.")
        AudioSegment.from_file(techno_audio_path).export(output_path, format="wav")
        return output_path

    # 3. Blend ratio calculation
    if slope <= min_slope:
        nature_weight, techno_weight = (1.0, 0.0) if slope_nature > slope_techno else (0.0, 1.0)
    elif slope >= max_slope:
        nature_weight, techno_weight = (1.0, 0.0) if slope_nature < slope_techno else (0.0, 1.0)
    else:
        blend_ratio = (slope - slope_techno) / (slope_nature - slope_techno)
        blend_ratio = np.clip(blend_ratio, 0.0, 1.0)
        nature_weight = blend_ratio
        techno_weight = 1.0 - blend_ratio

    print(f"Slope: {slope:.3f} | Blend → Nature: {nature_weight:.2f}, Techno: {techno_weight:.2f}")

    # 4. Load, normalize, and align audio
    TARGET_FRAME_RATE = 44100 # Standard frame rate for audio processing

    audio_nature = AudioSegment.from_file(nature_audio_path).set_channels(1).set_frame_rate(TARGET_FRAME_RATE)
    audio_techno = AudioSegment.from_file(techno_audio_path).set_channels(1).set_frame_rate(TARGET_FRAME_RATE)

    duration_ms = min(len(audio_nature), len(audio_techno))
    print(f"Aligning audio to {duration_ms} ms")

    audio_nature = audio_nature[:duration_ms]
    audio_techno = audio_techno[:duration_ms]

    # 5. Convert to numpy arrays
    def to_mono_np(audio):
        return np.array(audio.get_array_of_samples()).astype(np.float32)

    nature_samples = to_mono_np(audio_nature)
    techno_samples = to_mono_np(audio_techno)

    min_len = min(len(nature_samples), len(techno_samples))
    nature_samples = nature_samples[:min_len]
    techno_samples = techno_samples[:min_len]

    # 6. Blend
    blended = nature_weight * nature_samples + techno_weight * techno_samples
    blended = np.clip(blended, -32768, 32767).astype(np.int16)

    # 7. Export
    blended_audio = AudioSegment(
        blended.tobytes(),
        frame_rate=TARGET_FRAME_RATE,
        sample_width=2,
        channels=1
    )
    blended_audio.export(output_path, format="wav")
    return output_path


# 3. Modulate Audio
def modulate_audio_from_color_features(audio_path, features, output_path):
    sound = AudioSegment.from_file(audio_path)

    # 1. Speed modulation (based on hue)
    hue = features.get("color_hue", 0.001)
    hue_factor = 10  # Increase impact
    speed_factor = 0.8 + hue * hue_factor  # Now can range more widely
    new_frame_rate = int(sound.frame_rate * speed_factor)
    sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    # Do not reset with set_frame_rate — keeps the speed effect

    # 2. Volume modulation (based on contrast)
    contrast = features.get("contrast", 0.5)
    contrast_factor = 50  # Was 10, increase to 50 for stronger effect, as first results were too weak
    volume_change = (contrast - 0.5) * contrast_factor
    sound += volume_change

    # 3. Low-pass filter cutoff (based on edge density)
    edge_density = features.get("edge_density", 0.5)
    cutoff = int(20000 - edge_density * 18000)  # Before: 10k - 8k range
    sound = sound.low_pass_filter(cutoff)

    sound.export(output_path, format="wav")
    return output_path


def strongly_modulate_audio_from_features(audio_path, features, output_path):
    sound = AudioSegment.from_file(audio_path)

    # 1. Strong speed modulation based on hue
    hue = features.get("color_hue", 0.001)
    hue_factor = 65  # Boosted impact
    speed_factor = 0.7 + hue * hue_factor  # More dynamic speed range
    new_frame_rate = int(sound.frame_rate * speed_factor)
    sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    # Do NOT reset frame rate, this preserves speed effect

    # 2. Strong volume modulation based on contrast
    contrast = features.get("contrast", 0.5)
    volume_change = (contrast - 0.5) * 70  # Larger volume shifts
    sound += volume_change

    # 3. Strong low-pass filtering based on edge density
    edge_density = features.get("edge_density", 0.5)
    cutoff = int(20000 - edge_density * 18000)  # Wider range
    sound = low_pass_filter(sound, cutoff)
    sound += 40  # Increase overall volume by 40 dB, as it was too low

    # Export final sound
    sound.export(output_path, format="wav")
    return output_path



# 4. Run Full Pipeline
def process_image_to_sound(image_name, image_pathway, audio_pathway, nature_image, technosphere_image, nature_audio, technosphere_audio):
    image_path = os.path.join(image_pathway, image_name + ".jpg")
    features = extract_image_features(image_path)

    # Output paths
    blended_path = os.path.join(audio_pathway, f"{image_name}_blended.wav")
    final_path = os.path.join(audio_pathway, f"{image_name}_final.wav")

    # Blend based on slope
    blend_audio_from_slope(
    os.path.join(audio_pathway, nature_audio),
    os.path.join(audio_pathway, technosphere_audio),
    features["slope"],
    blended_path,
    os.path.join(image_pathway, nature_image + ".jpg"),
    os.path.join(image_pathway, technosphere_image + ".jpg")
    )

    # Modulate based on perceptual features
    strongly_modulate_audio_from_features(blended_path, features, final_path)
    return features, final_path

In [None]:
image_pathway = "Final_photos/"
audio_pathway = "Final_audios/"
nature_audio = "baseline_nat_final.wav"
technosphere_audio = "baseline_tech_final.wav"

# Process all images in the image_pathway folder
lowest_blend_score = None
lowest_blend_image = None

highest_blend_score = None
highest_blend_image = None

for fname in os.listdir(image_pathway):
    if fname.lower().endswith(".jpg"):
        image_name = os.path.splitext(fname)[0]
        features, output = process_image_to_sound(
            image_name=image_name,
            image_pathway=image_pathway,
            audio_pathway=audio_pathway,
            nature_image="baseline_nat",
            technosphere_image="baseline_tech",
            nature_audio=nature_audio,
            technosphere_audio=technosphere_audio
        )
        print(f"Processed {fname} -> {output}")
        print(f"Features for {image_name}: {features}")

        blend_score = features["slope"]
        if lowest_blend_score is None or blend_score < lowest_blend_score:
            lowest_blend_score = blend_score
            lowest_blend_image = image_name
        if highest_blend_score is None or blend_score > highest_blend_score:
            highest_blend_score = blend_score
            highest_blend_image = image_name

print(f"Image with lowest blended score: {lowest_blend_image} (score: {lowest_blend_score})")
print(f"Image with highest blended score: {highest_blend_image} (score: {highest_blend_score})")


Slope: -2.974 | Blend → Nature: 0.36, Techno: 0.64
Aligning audio to 12567 ms
Processed pref_mila_technosphere.jpg -> Final_audios/pref_mila_technosphere_final.wav
Features for pref_mila_technosphere: {'color_hue': 0.0015496181481681726, 'contrast': 0.22279277, 'edge_density': 0.11875569661458334, 'slope': -2.9744744818450077, 'symmetry': 0.35728172065375696}
Slope: -3.372 | Blend → Nature: 0.20, Techno: 0.80
Aligning audio to 12567 ms
Processed photo_18_2025-04-30_17-02-28.jpg -> Final_audios/photo_18_2025-04-30_17-02-28_final.wav
Features for photo_18_2025-04-30_17-02-28: {'color_hue': 0.0007906176656274932, 'contrast': 0.24137089, 'edge_density': 0.18087426918621785, 'slope': -3.3718236595466773, 'symmetry': 0.3642963088803445}
Slope: -2.656 | Blend → Nature: 0.49, Techno: 0.51
Aligning audio to 12567 ms
Processed photo_25_2025-04-30_17-02-28.jpg -> Final_audios/photo_25_2025-04-30_17-02-28_final.wav
Features for photo_25_2025-04-30_17-02-28: {'color_hue': 0.0009128873835584072, 'co

In [37]:
# Collect all image slopes and names
image_slopes = []
for fname in os.listdir(image_pathway):
    if fname.lower().endswith(".jpg"):
        image_name = os.path.splitext(fname)[0]
        features = extract_image_features(os.path.join(image_pathway, fname))
        image_slopes.append((features["slope"], image_name))

# Sort by slope
image_slopes_sorted = sorted(image_slopes, key=lambda x: x[0])

print("Order of image slopes (lowest to highest):")
for slope, name in image_slopes_sorted:
    print(f"{name}: {slope:.4f}")


Order of image slopes (lowest to highest):
baseline_tech: -3.8727
photo_18_2025-04-30_17-02-28: -3.3718
photo_7_2025-04-30_17-02-28: -3.2918
pref_mila_technosphere: -2.9745
photo_24_2025-04-30_17-02-28: -2.8176
photo_25_2025-04-30_17-02-28: -2.6562
photo_73_2025-04-30_17-02-28: -1.9705
baseline_nat: -1.4078


## Sound Mapping Philosophy

Translate these image features into acoustic expressions that support your conceptual axis:

| **Feature**             | **Natural Value → Sound Mapping**                                |
|-------------------------|-------------------------------------------------------------------|
| 1/f Slope ≈ -2          | First audio leans toward nature baseline                          |
| Green Hue (low HSV)     | Smooth timbres, analog warmth                                     |
| Soft Edges              | Gentle envelopes, low-pass filtering                              |
| Low Contrast            | Subtle volume variation, less percussive attack                   |
