In [None]:
# MAT 201A Spring 2017
# HW 2
# Sihwa Park
# sihwapark@mat.ucsb.edu
# 
# This notebook load an image and interpret each column of pixel data 
# as the magnitude spectrum of the FFT bins in a Short-Time Fourier Transform (STFT).
# Then, produce audio from these many STFTs by taking the IFT of each in succussion.
# My solution deals with an image as only a gray scale image 
# and darker values of pixel data are mapped onto higher values in the magrnitude spectrum.

In [None]:
%pylab inline

from __future__ import print_function
from __future__ import division

from ipywidgets import widgets, interact
from IPython.display import Audio

### Module for the Inverse Fourier Transform (IFT) of an image
This module transforms an image into sound through an IFT. It scans each column of the image, maps into the magnitude spectrum values of FFT bins of the STFT and returns its whole magnitude spectrum and audio data gained from the ITF.

In [None]:
def image_to_audio_by_ift(img):
    # find a window size for the STFT based on the row size of an image
    row_size = img.shape[0]
    fftbin_size = 2

    while row_size / fftbin_size > 1:
        fftbin_size *= 2

    fftbin_size += 1
    print('Row size: %d, fftbin size: %d'% (row_size, fftbin_size))
    
    # it is the expected length of audio samples after an IFT
    expected_sample_length = (fftbin_size - 1) * 2 * img.shape[1]

    print('Expected audio length: %s'% (expected_sample_length))
        
    # if fftbin_size is bigger than row_size, it will add zeros at the end of a magnitude array
    zero_padding = zeros(fftbin_size - row_size)
    
    spectrum = []
    audio = []

    for index in range(img.shape[1]):
        magnitude = 1 - img[:, index, 0]
        magnitude = np.append(magnitude, zero_padding)
        # spectrum.append(magnitude)
        phase = zeros_like(magnitude)
        X = [complex(cos(p) * A, -sin(p) * A) for A, p in zip(magnitude, phase)]
        spectrum.append(abs(array(X))) # in this mapping, it is the same as the code, 'spectrum.append(magnitude)' above 
        sig = fft.irfft(X) * 8
        audio = np.append(audio, sig)
    
    print('Actual audio length: %s '% (audio.size)) # the audio sample length
    
    return spectrum, audio

### First image - gradient triangle

In [None]:
img = imread('media/triangle_rgb.png')
imshow(img[:,:,0], cmap=cm.gray)
colorbar();

In [None]:
img.shape

In [None]:
spectrum, audio = image_to_audio_by_ift(img)

subplot(211)
gcf().set_figwidth(13)
gcf().set_figheight(9)
spectrum = array(spectrum).T
imshow(spectrum, aspect='auto')
title('The magnitude spectrum')
xlabel('Time (The column index of an image)')
ylabel('FFT bin (Rows + zero-padded array)')

subplot(212)
plot(audio)
title('The audio signal')
xlabel('Sample')
ylabel('Amplitude')

tight_layout()
pass

In [None]:

Audio(data=audio, rate=44100)

Below images are the examples used in Yeo's paper.

### Second image - gradient square

In [None]:
img = imread('media/sawtooth.png')
imshow(img, cmap=cm.gray)
colorbar();

In [None]:
spectrum, audio = image_to_audio_by_ift(img)

subplot(211)
gcf().set_figwidth(13)
gcf().set_figheight(9)
spectrum = array(spectrum).T
imshow(spectrum, aspect='auto')
title('The magnitude spectrum')
xlabel('Time (The column index of an image)')
ylabel('FFT bin (Rows + zero-padded array)')

subplot(212)
plot(audio)
title('The audio signal')
xlabel('Sample')
ylabel('Amplitude')

tight_layout()
pass

In [None]:
Audio(data=audio, rate=44100)

### Third image - texture image

In [None]:
img = imread('media/texture_comparison_3.png')
imshow(img, cmap=cm.gray)
colorbar();

In [None]:
spectrum, audio = image_to_audio_by_ift(img)

subplot(211)
gcf().set_figwidth(13)
gcf().set_figheight(9)
spectrum = array(spectrum).T
imshow(spectrum, aspect='auto')
title('The magnitude spectrum')
xlabel('Time (The column index of an image)')
ylabel('FFT bin (Rows + zero-padded array)')

subplot(212)
plot(audio)
title('The audio signal')
xlabel('Sample')
ylabel('Amplitude')

tight_layout()
pass

In [None]:
Audio(data=audio, rate=44100)

### Fourth image - frequency modulation visulization 

In [None]:
img = imread('media/FM.220.5_2_1_2.png')
imshow(img, cmap=cm.gray)
colorbar();

In [None]:
spectrum, audio = image_to_audio_by_ift(img)

subplot(211)
gcf().set_figwidth(13)
gcf().set_figheight(9)
spectrum = array(spectrum).T
imshow(spectrum, aspect='auto')
title('The magnitude spectrum')
xlabel('Time (The column index of an image)')
ylabel('FFT bin (Rows + zero-padded array)')

subplot(212)
plot(audio)
title('The audio signal')
xlabel('Sample')
ylabel('Amplitude')

tight_layout()
pass

In [None]:
Audio(data=audio, rate=44100)