# Image Sonification



## Convert Grayscale Image to Audio via Griffin-Lim Spectrogram Reconstruction
This script converts a grayscale image into a synthetic audio waveform by treating the image as a magnitude spectrogram, then reconstructing the audio using the Griffin-Lim algorithm. It:

* Loads and resizes the image to fit spectrogram dimensions (`513 × 128`)

* Normalizes and vertically flips the image to match spectrogram orientation

* Converts the image into a linear-scale spectrogram

* Applies the Griffin-Lim algorithm to reconstruct audio from the spectrogram

* Saves the output as a `.wav` file

The process is useful for low-level image sonification and for evaluating whether image features can be retained in the audio domain.

In [8]:
import numpy as np
import cv2
import librosa
import soundfile as sf

def image_to_spectrogram_audio(image_path, output_audio="output.wav", sr=22050):
    # Load image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError("Image not found or invalid path.")
    
    # Resize image to match Griffin-Lim constraints (n_fft = 1024, hop_length=512)
    # Height (frequency bins) must be (n_fft // 2 + 1) = 513 for n_fft=1024
    target_height = 513  # For n_fft=1024
    target_width = 128   # Adjust width as needed (e.g., 128 for time steps)
    image = cv2.resize(image, (target_width, target_height))
    
    # Normalize to [0, 1] and flip vertically (spectrograms use bottom-to-top frequency)
    image_normalized = np.flipud(image.astype(np.float32) / 255.0)
    
    # Convert image to dB-scaled "spectrogram" (add epsilon to avoid log(0))
    spectrogram_db = librosa.amplitude_to_db(image_normalized + 1e-7, ref=np.max)
    
    # Convert dB back to linear amplitude
    spectrogram_linear = librosa.db_to_amplitude(spectrogram_db)
    
    # Griffin-Lim parameters (must match spectrogram dimensions)
    n_fft = 1024
    hop_length = 512
    win_length = 1024
    
    # Reconstruct audio
    audio = librosa.griffinlim(
        spectrogram_linear,
        n_iter=32,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length
    )
    
    # Save as WAV file
    sf.write(output_audio, audio, sr)
    print(f"Audio saved to {output_audio}")

# Example usage
image_to_spectrogram_audio(r"D:\Documents\Iquisitionis\103\vehicle-type-recognition\dataset\car\Image_15.jpg", "car.wav")

Audio saved to car.wav
