In [None]:

%%capture
import import_ipynb
from principal_component_analysis import *


In [None]:

df = read_csv('longley.csv')
	
x = df['Population'].to_numpy()
y = df['Employed'].to_numpy()


In [None]:

x = x - mean(x)
y = y - mean(y)
	
varx = sum(x**2)/len(x)
vary = sum(y**2)/len(y)
	
x = x/sqrt(varx)
y = y/sqrt(vary)


In [None]:

from scipy.linalg import pinv
	
# polynomial function - degree d-1
def poly(t, x, y, d): 
	# lifting the dataset by taking powers
	# decreasing powers of x in X
	X = vstack([ x**(d-1-i) for i in range(d) ])  
	Y = y
	Xplus = pinv(X)
	wstar = dot(Y, Xplus)
	return sum([ t**(d-1-i)*wstar[i] for i in range(d) ], axis = 0)


In [None]:

xmin,ymin = amin(x), amin(y)
xmax, ymax = amax(x), amax(y)
	
# close previous figures
close()

figure(figsize = (12,12)) 
# six subplots
rows, cols = 3, 2
	
# t interval
t = arange(xmin, xmax, .01)
	
for i in range(6):
	d = 3 + 2*i # degree = d-1
	subplot(rows, cols, i+1)
	scatter(x, y, s = 5)
	scatter(0, 0, s = 7, c = 'r')
	plot(t, poly(t, x, y, d), c = 'b', lw = .5)
	title(f'degree = {d-1}')
	grid()
	
show()


In [None]:

from scipy.special import expit as sigma
from numpy.random import default_rng as rng

dataset = array([0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0, 3.25, 3.5, 4.0, 4.25, 4.5, 4.75, 5.0, 5.5])
N = len(dataset)
dataset = dataset.reshape((N,1))
labels = array([0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1])

# w = (m, b)

def q(x,w):
	m, b = w[:-1], w[-1]
	return sigma(dot(m,x) + b)

def gradient(dataset, labels, w):
	G = array( [ (q(x,w)-p) * hstack([x,1]) for x, p in zip(dataset, labels) ])
	return mean(G, axis = 0)

def logreg(dataset, labels):
	Q = cov(dataset, bias = True)
	mu = mean(dataset, axis = 0)
	L = 1 + dot(mu,mu) + trace(Q)
	t = 1/L # short-step learning rate
	d = len(mu)
	w = rng().random(d+1) # initial m,b
	g = gradient(dataset, labels, w)
	num_iter = 0
	print(f'starting m,b: {w}')
	while not allclose(g,0):
		w -=  t * g
		g = gradient(dataset, labels, w)
		num_iter += 1
	print(f'minimizer m,b: {w}')
	print(f'gradient at minimizer: {g}')
	print(f'num iter: {num_iter}')

logreg(dataset,labels)


In [None]:

%matplotlib ipympl

mnist = read_csv('mnist.csv').to_numpy()
dataset = mnist[:, 1:]
labels = mnist[:, 0]

n = 3
engine = PCA(n_components = n)
reduced = engine.fit_transform(dataset)
ax = axes(projection = '3d')
colors = ('blue', 'green', 'black', 'brown', 'gray', 'cyan', 'turquoise', 'orange', 'orchid', 'red')

from scipy.spatial import ConvexHull

for i, color in enumerate(colors): 
	points = reduced[labels == i]
	hull = ConvexHull(points)
	for simplex in hull.simplices: 
		ax.plot(*points[simplex].T, '-', c = color)

ax.axis('equal')
ax.axis('off')
show()
