# Music to Graphic Art Transformation Tests

This notebooks deals with some ideas starting with Fourier transforms, wave representations and Spectrograms



In [1]:
# general imports
import numpy as np
import pandas as pd
import scipy as sp
from scipy import signal

In [2]:
# audio reading and writing
from scipy.io.wavfile import read as wavread
import pydub
import wave

In [3]:
# use some things to normalize
from torch import FloatTensor
from torch.nn.functional import normalize as torchnorm

# vecs = np.random.rand(3, 16, 16, 16)
# norm_vecs = normalize(FloatTensor(vecs), dim=0, eps=1e-16).numpy()

In [4]:
#graphical libraries
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
import colorcet as cc
from bokeh import palettes

import matplotlib.pyplot as plt

%matplotlib inline

Audio was already extracted from yuotube videos for tests with the following commands:


    ffmpeg -i my_video.mp4 -c copy -map 0:a output_audio.mp4

    ffmpeg -i my_video.mp4 output_audio.wav



In [5]:
output_notebook()

In [6]:
# load sound wave
feelgood = wave.open("/home/leo/Music/lucaStragnoli-FeelGoodINC.wav")

In [7]:
wparams = feelgood.getparams()

In [8]:
wparams

_wave_params(nchannels=2, sampwidth=2, framerate=44100, nframes=8580096, comptype='NONE', compname='not compressed')

In [9]:
tsec = float(wparams.nframes)/wparams.framerate

In [10]:
tsec / 60

3.2426666666666666

In [11]:
# load soundwave to numpy (to do nice graphs we need it)
npwavread = wavread("/home/leo/Music/lucaStragnoli-FeelGoodINC.wav")

In [12]:
npwavread[1].shape

(8580096, 2)

In [13]:
fgfft = np.fft.fft(npwavread[1],axis=-1)

In [14]:
fgfft.real

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       ...,
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [15]:
#plt.plot(freq, fgfft.real, freq, fgfft.imag)
# plt.plot(fgfft.real)
# plt.show()

In [16]:
f, t, Sxx = signal.spectrogram(npwavread[1])

  .format(nperseg, input_length))


In [17]:
f.shape, t.shape, Sxx.shape

((2,), (1,), (8580096, 2, 1))

In [18]:
# plt.pcolormesh(t, f, Sxx[:1])
# plt.ylabel('Frequency [Hz]')
# plt.xlabel('Time [sec]')
# plt.show()

In [19]:
npleft, npright = npwavread[1].transpose()

In [20]:
npright.shape

(8580096,)

In [21]:
np.max(npwavread[1])

32767

In [22]:
norm_vecs = torchnorm(FloatTensor(npwavread[1]), eps=1e-16).numpy()

In [23]:
np.min(norm_vecs)

-1.0

In [24]:
SAMPLES = 128

In [25]:
resampled = sp.signal.resample(npwavread[1], SAMPLES)

In [26]:
resampled = np.transpose(resampled)

In [27]:
resampled.shape, resampled.max(), resampled.min()

((2, 128), 14.016053694302132, -16.708223775666923)

In [28]:
resampled[:,:10]

array([[-1.02882238,  1.28023031, -3.0595801 , -1.86459215,  0.94286881,
         5.36942226,  1.30012673, -3.65477935,  2.30575476,  1.78652749],
       [-0.1406882 ,  5.26977948, -5.37238399,  3.4152935 , -5.12355997,
        -2.35701064, -3.78216841,  4.89446312, -3.53016807,  1.55912549]])

In [29]:
np.transpose(resampled).shape

(128, 2)

In [30]:
x = np.abs(np.transpose(resampled[0]))

In [31]:
y = np.linspace(0,1, SAMPLES)

In [32]:
y.shape, x.shape

((128,), (128,))

In [33]:
# p = figure(title="linear", x_axis_label='time', y_axis_label='volume')

In [34]:
# p.line(np.transpose(resampled[0]), y, line_width=2)

In [35]:
# show(p)

In [36]:
p1 = figure(title="linear", x_axis_label='time', y_axis_label='volume')

In [37]:
y.shape

(128,)

In [38]:
# p1.line(y, np.transpose(resampled)[0], line_width=2)
p1.line(y, x, line_width=2)

In [39]:
show(p1)

In [40]:
p2 = figure(title="hexbin", x_axis_label='time', y_axis_label='volume')
p2.hexbin(x, y, size=0.5, hover_color="pink", hover_alpha=0.8)

(GlyphRenderer(id='1139', ...),      q  r  counts
 0    0  0       7
 1    1 -1      20
 2    1  0       9
 3    2 -1      19
 4    2  0       7
 5    3 -1      14
 6    3  0       5
 7    4 -1       6
 8    4  0       2
 9    5 -1       4
 10   5  0       2
 11   6 -1       5
 12   6  0       5
 13   7 -1       2
 14   7  0       3
 15   8 -1       2
 16   9 -1       3
 17   9  0       4
 18  10 -1       1
 19  10  0       2
 20  11 -1       2
 21  12  0       1
 22  14 -1       1
 23  19 -1       2)

In [41]:
show(p2)

In [42]:
radii = np.random.random(size=SAMPLES) * 1.5
colors = [
    "#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 10+2*y)
]

In [43]:
p3 = figure(title="scatter", x_axis_label='time', y_axis_label='volume')

p3.scatter(x, y, radius=radii,
          fill_color=colors, fill_alpha=0.6,
          line_color=None)

In [44]:
show(p3)

In [52]:
p4 = figure(title="vbar", x_axis_label='time', y_axis_label='volume')

In [53]:
p4.vbar(y, width=0.2, top=x)

In [54]:
show(p4)

In [48]:

p5 = figure(title = "circles")

p5.circle(resampled[0], resampled[1],
         color=colors, fill_alpha=0.2, size=10)

show(p5)