# Dimensionality reduction

In [2]:
from matplotlib import pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

import math
from functools import reduce

In [3]:
df_vert = pd.DataFrame()
df_vert['x'] = np.random.normal(0,1,1000)
df_vert['y'] = np.random.normal(0,5,1000)

sns.scatterplot(df_vert['x'], df_vert['y'])
plt.axis('equal')
plt.show()

df_vert.hist()

AttributeError: module 'seaborn' has no attribute 'scatterplot'

In [None]:
df_lin = pd.DataFrame()

x_start = 10
x_stop = 200
n_points = 100

x = np.linspace(x_start,x_stop,n_points)
y = np.add(2 * x, np.random.normal(0,50,n_points))

df_lin['x'] = x
df_lin['y'] = y

sns.scatterplot(df_lin['x'], df_lin['y'])

df_lin['x'] = df_lin['x'] - df_lin['x'].mean()
df_lin['y'] = df_lin['y'] - df_lin['y'].mean()
sns.scatterplot(df_lin['x'], df_lin['y'])


In [4]:
def vector_add(a, b):
    return [a_i + b_i for a_i, b_i in zip(a, b)]


def vector_sum(vectors):
    return reduce(vector_add, vectors)


def dot(a, b):
    return sum(item_a * item_b for item_a, item_b in zip(a, b))


def scalar_multiply(c, v):
    return [c * v_i for v_i in v]


def make_matrix(num_rows, num_cols, entry_fn):
    return [[entry_fn(i, j)
             for j in range(num_cols)]
             for i in range(num_rows)
           ]


def direction(w):
    mag = math.sqrt(sum([item ** 2 for item in w]))
    return [w_i / mag for w_i in w]


def directional_variance_i(x_i, w):
    return dot(x_i, direction(w)) ** 2


def directional_variance(X, w):
    return sum(directional_variance_i(x_i, w) for x_i in X)


def directional_variance_gradient_i(x_i, w):
    projection_length = dot(x_i, direction(w))
    return [2 * projection_length * x_ij in x_i]

    
def directional_variance_gradient(X, w):
    return vector_sum(directional_variance_gradient_i(x_i, w) \
                      for x_i in X)


def principal_component_analysis(X, num_components):
    components = []
    for _ in range(num_components):
        component = first_principal_component(X)
        components.append(component)
        X = remove_projection()

In [5]:
def first_principal_component(X):
    guess = [i for _ in X[0]]
    unscaled_maximizer = maximize_batch(
        partial(directional_variance, X),
        partial(directional_variance_gradient, X),
        guess
    )
    return direction(unscaled_maximizer)


def project(v, w):
    projection_length = dot(v, w)
    return scalar_multiply(projection_length, w)

In [9]:
def entry_fn(x, y):
    return 2 * x


A = make_matrix(num_rows=10, num_cols=2, entry_fn=entry_fn)
print(A)



[[0, 0], [2, 2], [4, 4], [6, 6], [8, 8], [10, 10], [12, 12], [14, 14], [16, 16], [18, 18]]


In [74]:
a = [1, 2, 3]
b = [4, 5, 6]

display(vector_sum([a, b]))

[5, 7, 9]