Principal Component Analysis
---------------------



In [4]:
import numpy
import scipy

from time import sleep

from noise import Noise

from ipywidgets import interact

from liwc import liwcify

import bokeh.io
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.resources import INLINE
from bokeh.models import Range1d

In [5]:
output_notebook(resources=INLINE)
sleep(1)                  # Otherwise `Run All` messes things up; 
bokeh.io._nb_loaded=True  # see https://github.com/bokeh/bokeh/issues/4987

In [6]:
data = Noise(duration=0.1).brownian().resample_array(n_cols=6000)
data = data.astype(float) / data.max()
x = numpy.arange(len(data[:,0]))

# Covariance of time series data; the data is
# already mean-centered...
cov = data @ data.T / (data.shape[0] - 1)

# Take the covariance matrix to a fractional power.
# This lets us take smaller steps.
U, s, V = numpy.linalg.svd(cov)
cov_pow = U @ numpy.diag(s ** 0.1) @ V

In [7]:
# Take a random sample and record its magnitude.
sample = data[:, 0]
mag = numpy.dot(sample.T, sample) ** 0.5
smoothed_samples = [sample]

# Now we can use the covariance matrix to transform 
# a random sample and watch as it approaches
# the predominant harmoic structure in the data --
# A.K.A. the power iteration algorithm.
for _ in range(9):
    sample = numpy.dot(cov_pow, sample)
    sample = mag * sample / numpy.dot(sample.T, sample) ** 0.5
    smoothed_samples.append(sample)

In [8]:
fig = figure(title="Brownian Noise", plot_height=300, plot_width=600)

selected_samples = smoothed_samples
for i, s in enumerate(selected_samples):
    ln = fig.line(x, s, 
                  line_width=1,
                  color=(min(32 * i, 255),      # Red
                         min(32 * i, 255),      # Green
                         max(200 - 16 * i, 0),  # Blue, Alpha
                         (i + 2.0) / (len(selected_samples) + 1)))
_ = show(fig)

In [9]:
fig = figure(title="Brownian Noise", plot_height=300, plot_width=600)
fig.set(x_range=Range1d(0, len(selected_samples[0])), y_range=Range1d(-1.1, 1.1))

i = 0
s = selected_samples[i]
ln = fig.line(x, s, 
              line_width=1,
              color=(64, 64, 168, 0.6))

show(fig)

def update(i=0):
    s = selected_samples[i]
    ln.data_source.data['y'] = s
    push_notebook()

In [10]:
interact(update, i=(0, len(selected_samples) - 1, 1))

<function __main__.update>

In [2]:
data = liwcify(1342, 'VERB')

In [3]:
data

[52,
 74,
 71,
 59,
 50,
 60,
 50,
 57,
 60,
 55,
 66,
 56,
 66,
 57,
 48,
 53,
 51,
 80,
 67,
 49,
 56,
 62,
 44,
 63,
 74,
 60,
 56,
 65,
 60,
 58,
 53,
 54,
 66,
 62,
 66,
 64,
 68,
 54,
 63,
 49,
 50,
 36,
 50,
 52,
 58,
 49,
 61,
 57,
 54,
 52,
 46,
 61,
 48,
 44,
 53,
 71,
 65,
 55,
 55,
 45,
 47,
 59,
 62,
 43,
 47,
 52,
 76,
 61,
 62,
 64,
 60,
 44,
 58,
 67,
 69,
 54,
 65,
 55,
 60,
 54,
 55,
 64,
 53,
 73,
 70,
 63,
 55,
 67,
 40,
 85,
 70,
 67,
 73,
 60,
 56,
 68,
 50,
 56,
 57,
 55,
 51,
 52,
 71,
 52,
 55,
 53,
 57,
 63,
 68,
 62,
 71,
 51,
 52,
 62,
 68,
 63,
 55,
 63,
 47,
 61,
 43,
 54,
 57,
 58,
 63,
 53,
 59,
 44,
 53,
 54,
 68,
 70,
 49,
 62,
 54,
 67,
 69,
 63,
 63,
 71,
 67,
 75,
 51,
 73,
 50,
 43,
 47,
 61,
 54,
 56,
 48,
 58,
 56,
 51,
 54,
 40,
 56,
 55,
 68,
 41,
 57,
 53,
 54,
 56,
 51,
 46,
 54,
 64,
 68,
 67,
 80,
 64,
 52,
 65,
 66,
 66,
 59,
 75,
 53,
 69,
 85,
 65,
 55,
 64,
 64,
 70,
 64,
 81,
 71,
 78,
 70,
 63,
 55,
 68,
 63,
 51,
 78,
 81,
 73,
 72,
