In [None]:
%pylab inline
rcParams['figure.figsize'] = (20, 4) #wide graphs by default
from ipywidgets import widgets, interact
from IPython.display import Audio
from scipy.io import wavfile

## Read in an image

In [None]:
i = imread('media/spider.png')
imshow(i)
pass

## Convert to greyscale

In [None]:
print(i.shape)
R = i[:, :, 0]
G = i[:, :, 1]
B = i[:, :, 2]
grey = R * 299. / 1000 + G * 587. / 1000 + B * 114. / 1000
grey.shape

In [None]:
print("min:%f mean:%f median:%f max:%f" % (grey[:,0].min(), grey[:,0].mean(), median(grey[:,0]), grey[:,0].max()))

In [None]:
grey = 256 - grey
grey = flip(grey, 0)
imshow(grey, cmap='gray')
pass

In [None]:
from scipy.misc import imresize
grey = imresize(grey, (1023, 1433)).astype(float64)
grey.shape

In [None]:
grey.dtype

In [None]:
amax(grey)

## Set DC to 0
We "stack" a row of zeros onto the bottom of the image which represents the first fft bin, the DC bin. With this image of Nick Cave, this stacking has the effect of making the height 1024, a power of 2.

In [None]:
grey = vstack((grey, zeros_like(grey[-1:])))
grey = vstack((grey, zeros_like(grey[-1:])))
shape(grey)

In [None]:
grey.dtype

In [None]:
grey = grey / 256 # scaleed to (0, 1)

## Scale by the size of the FFT

In [None]:
grey *= 2048.0

In [None]:
grey.max()

In [None]:
shape(grey)

## Randomize phases

In [None]:
mystft = grey * exp(-1j * uniform(-pi, pi, shape(grey)))

## ...or don't

In [None]:
# mystft = grey * exp(-1j * zeros_like(grey))

In [None]:
shape(mystft)

## Take the inverse FFT

In [None]:
imystft = fft.irfft(mystft, axis = 0)
imystft.shape
# the axis = 0 means to treat columns as FFT data

## Build the sound

In [None]:
# allocate a lot of 0s
data = zeros(2048 * 1433 + 10000) * 1j
for column in range(1433):
    # copy in each piece
    start = column * 2048
    data[start:start+2048] = imystft[:,column]
data = real(data)
sound = data / amax(abs(data))

In [None]:
Audio(data = sound, rate = 44100)

In [None]:
wavfile.write('result/spider.wav', 44100, (sound * 2**15).astype(int16))

In [None]:
mean(sound)

In [None]:
sampleRate, sample = wavfile.read('result/spider.wav')
specgram(sample, Fs = 44100, NFFT=460)
pass

In [None]:
plot(sound)

In [None]:
# here's an alternative that takes the iFFT on each loop
#sound = zeros(2048 * 1433 + 10000) * 1j
#for column in range(1433):
#    start = column * 2048
#    sound[start:start+2048] = fft.ifft(myfft[:,column])

## (Optional) Use windows and overlapping

In [None]:
window = hamming(2048)
#window = bartlett(2048)
plot(window)
window.shape

In [None]:
# recalculate...
mystft = grey * exp(-1j * zeros_like(grey))
imystft = fft.irfft(mystft, axis = 0)

## Build the sound

In [None]:
data = zeros(int(2048 * 1433 / 2 + 10000)) * 1j
data[:2048] += window *  imystft[:,0]
for column in range(1, 1433):
    start = column * 1024
    data[start:start+2048] += window * imystft[:,column]
data = real(data)
sound = data / amax(abs(data))

In [None]:
plot(sound)
pass

In [None]:
Audio(data = sound, rate = 44100)

In [None]:
wavfile.write('result/spider_windowed.wav', 44100, (sound * 2**15).astype(int16))

In [None]:
sampleRate, sample = wavfile.read('result/spider_windowed.wav')
specgram(sample, Fs = 44100, NFFT=460)
pass

Now go watch [this video](https://www.youtube.com/watch?v=M9xMuPWAZW8&feature=youtu.be&t=300) of the spectrogram of Aphex Twin's track named...

$$\Delta M_i^{-1} = -\alpha \sum_{n=1}^N D_i[n] \left[ \sum_{j \in C[i]} F_{ji}[n-1]+Fext_i[n^{-1}]\right]$$