In [None]:
%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division
from IPython.display import display, HTML, Audio

# Variance and correlation

![http://imgs.xkcd.com/comics/random_number.png](http://imgs.xkcd.com/comics/random_number.png)

![http://imgs.xkcd.com/comics/sports.png](http://imgs.xkcd.com/comics/sports.png)

![http://imgs.xkcd.com/comics/cell_phones.png](http://imgs.xkcd.com/comics/cell_phones.png)

[http://xkcd.com/882/](http://xkcd.com/882/)

## Variance and covariance

$$\operatorname{Var}(X) =\sigma^2 =\int (x-\mu)^2  f(x)  dx =\int x^2  f(x)  dx - \mu^2$$

$$\operatorname{Var}(X) = \sum_{i=1}^n p_i\cdot(x_i - \mu)^2 = \sum_{i=1}^n (p_i\cdot x_i^2) - \mu^2$$

http://en.wikipedia.org/wiki/Variance

For equally likely values:

$$\operatorname{Var}(X) = \frac{1}{n} \sum_{i=1}^n (x_i - \mu)^2$$


$\mu$ is mean.

(Also see [Mean squared error](https://en.wikipedia.org/wiki/Mean_squared_error) and [Least squares](https://en.wikipedia.org/wiki/Least_squares))

$p_i$ is $f(x)$ so $x$ turns into $i$

In [None]:
samps = normal(scale=2.0, size= 10000)
samps.var()
## variance... assumes gausian dist..

In [None]:
samps.std(), samps.std()**2

In [None]:
a = rand(100)
b = rand(100)
cov(a, b)
# mat out is....
# aa ab
# ba bb

In [None]:
a = rand(100) * 100
b = rand(100) * 100
cov(a, b)

In [None]:
a = rand(100000) * 100
b = rand(100000) * 100
cov(a, b)

In [None]:
# var(a) ==> aa
var(a), var(b)

In [None]:
a = rand(100)
b = rand(100)
d = sin(linspace(0, 2 * pi * 7, 100))
plot(a + d)
plot(b + d)
pass

In [None]:
cov(a + d, b + d)

In [None]:

a = rand(100)
b = rand(100)
c = rand(100)
cov(a + (2 * c), b + (2 * c)) # the c in both should lead to some correlation (relationship)

# the thing to look for is that the var(x) is more similar to the cov(x, y)

In [None]:
a = rand(100000)
b = rand(100000)
c = rand(100000)
cov(a + (2 * c), b + (2 * c))
# XXX order does not matter... not time-domain dependent

In [None]:
plot(a + d + 2)
plot(-2 * (b + d))
cov(a + d + 2, -2 * (b + d))

In [None]:
plot(a + (d * 3))
plot(-(b + (d * 3)))
pass

In [None]:
cov(a + (d * 3), -(b + (d * 3)))

## Correlation and autocorrelation

$$(f \star g)(\tau)\ \stackrel{\mathrm{def}}{=} \int_{-\infty}^{\infty} f^*(t)\ g(t+\tau)\,dt$$

$$ (f \star g)(n) = \frac{1}{N} \sum_{i=1}^N f(i) . g(i + n)$$


[cross correlation](https://en.wikipedia.org/wiki/Cross-correlation)
[autocorrelation](https://en.wikipedia.org/wiki/Autocorrelation)

time matters in the domain...

In [None]:
sig1 = sin(linspace(0, 100 * 2 * pi, 44100))
sig2 = sin(linspace(0.1, 0.1 + (100 * 2 * pi), 44100))
plot(sig1)
plot(sig2)
xlim((0, 1000))

In [None]:
lags, c, lines, line = xcorr(sig1, sig2, maxlags=500, usevlines=False)
grid()

In [None]:
lags, c, lines, line = xcorr(sig1, sig2, maxlags=50, usevlines=False)
grid()

In [None]:
len(c)

In [None]:
plot(c)
pass

In [None]:
argmax(c)

In [None]:
cc_index = lags[argmax(c)]
print(cc_index)

In [None]:
plot(lags, c)
vlines(cc_index, 0.5, 1.09, color='r', lw=2)
text(cc_index + 1, 1.05, 'CC peak', color='r')
grid()

In [None]:
sr = 44100
f = 100.0
Ps = sr / f
Ps # period in number of samples

In [None]:
cc_index

CC can be used to determine pitch and periodicity.

In [None]:
cc_index / Ps

In [None]:
2 * pi * cc_index / Ps

In [None]:
2 * pi * (cc_index + 1) / Ps

Two different noise samples produce low values of cross-correlation:

In [None]:
noise1 = random.random(44100) - 0.5
noise2 = random.random(44100) - 0.5

In [None]:
lags, c, lines, line = xcorr(noise1, noise2, maxlags=500, usevlines=False);
grid()

And no clear patterns in the CC function:

In [None]:
noise1 = normal(size=44100)
noise2 = normal(size=44100)

In [None]:
lags, c, lines, line = xcorr(noise1, noise2, maxlags=500, usevlines=False);
grid()

Cross-correlation can find harder to see relationships:

In [None]:
a = (rand(44100) * 2) - 1
plot(a)
pass

In [None]:
b = sin(linspace(0, 2 * pi * 200, 44100)) * 0.3
plot(a + b)
pass

In [None]:
c = (rand(44100) * 2) - 1
d = sin(linspace(0.7, 0.7 + (2 * pi * 200), 44100)) * 0.3
plot(c + d)
pass

In [None]:
plot(c + d)
xlim(0, 1000)

In [None]:
lags, c, lines, line = xcorr(a + b, c + d, maxlags=500)
pass

In [None]:
m = argmax(c[500:600]) + 500

In [None]:
lags[m]

In [None]:
sr = 44100
f = 200.0
Ps = sr / f
Ps # period in number of samples

In [None]:
lags[m] / Ps

In [None]:
(lags[m] / Ps) * 2 * pi

# Autocorrelation

In [None]:
lags, c, lines, line = acorr(a + b, maxlags=500);

In [None]:
plot(c[501:])
vlines(220.5, -0.15, 0.15)

CC for Inter-aural time delay (or stereo relationships) using running cross-correlation

In [None]:
%pylab inline

In [None]:
from scipy.io import wavfile

In [None]:
sr, in_sig = wavfile.read('media/cars.wav')

In [None]:
in_sig = in_sig[:318000, :]

In [None]:
win_size = 1024
hop = 256

cc_lags = 0.001 * sr # 1 ms max lag
print(cc_lags)


rccf = []
win_start = arange(0, in_sig.shape[0] - win_size, hop)
for start in win_start:
    win = in_sig[start: start + win_size].astype(float)
    lags, c, lines, line = xcorr(win[:,0], win[:,1], maxlags = 44) # int(cc_lags))
    rccf.append(c)


In [None]:
imshow(array(rccf).T, aspect='auto')

In [None]:
plot(in_sig)

CC for tempo and rhythm estimation:

In [None]:
sr, in_sig = wavfile.read('media/superstition.wav')
shape(in_sig)
shape(in_sig.flat[0::2])

In [None]:
Audio(data = in_sig.flat[0::2], rate = sr)

In [None]:
plot(in_sig.flat[0::2])

In [None]:
in_sig.shape

In [None]:
lags,c, lines, line = acorr(in_sig[:100000,0].astype(double), maxlags= 50000);

## Two-dimensional cross-correlation

![sidereusnuncius_crawford_dbyn2008.jpg](media/sidereusnuncius_crawford_dbyn2008.jpg)

In [None]:
i = imread('media/galileo.png')
imshow(i)
shape(i)

In [None]:
plot(i[:,:,3].T);

In [None]:
i.shape

In [None]:
i.dtype

In [None]:
i = sum(i[:,:,:-1], axis=2)/3.0
imshow(i)
colorbar()

In [None]:
i.shape

In [None]:
imshow(i)
colorbar()

In [None]:
imshow(i, cmap=cm.gray)
colorbar()

In [None]:
i = where(i > 0.9, 0, 1)
imshow(i, cmap='gray')
colorbar()

In [None]:
o = imread('media/o.png')
imshow(o)
colorbar()

In [None]:
o = o.astype(float).sum(axis=-1)/3
o = where(o > 0.9, 0, 1)
imshow(o, cmap=cm.gray, interpolation='nearest')

In [None]:
from scipy.signal import correlate2d
cc = correlate2d(i, o)
imshow(cc)
colorbar()
gcf().set_figheight(8)

In [None]:
imshow(cc, cmap=cm.gray)
colorbar()
gcf().set_figheight(8)

In [None]:
imshow(where(cc > 200, 1, 0), interpolation='nearest', cmap=cm.gray)
colorbar()
gcf().set_figheight(8)

In [None]:
imshow(where(cc > 300, 1, 0), interpolation='nearest', cmap=cm.gray)
colorbar()
gcf().set_figheight(8)

In [None]:
from scipy.ndimage.filters import maximum_filter

imshow(maximum_filter(cc, (10,10)))
gcf().set_figheight(8)

In [None]:
subplot(121)
imshow(maximum_filter(cc, (50,50)))
subplot(122)
imshow(i, cmap=cm.gray)
gcf().set_figheight(8)

In [None]:
mf = maximum_filter(cc, (50,50))

In [None]:
argmax(mf)

In [None]:
unravel_index(argmax(mf), mf.shape)

In [None]:
argmax(cc)

In [None]:
unravel_index(argmax(cc), cc.shape)

By: Andr√©s Cabrera mantaraya36@gmail.com
For MAT course MAT 201A at UCSB

This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/

![http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png](http://i.creativecommons.org/l/by-nc-sa/3.0/88x31.png)