### 2021-2 Machine Learning (01)
## Homework #3-2: PCA
---
Copyright (c) Code Designed by Prof. Jaehyeong Sim 

Department of Computer Science and Engineering

Ewha Womans University

## Prepare input data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

K = 5

X, t = make_blobs(n_samples = 20, n_features=D, centers=K, random_state=3, cluster_std=1)
N = len(X)

## Visualize the data in 3d space

In [None]:
df = pd.DataFrame(X, columns=["x1", "x2", "x3"])
fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black")])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.show()

## Get Xc and visualize It

In [None]:
Xbar = X.mean(axis=0)
Xc = X - Xbar
print(Xbar)

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.show()

## Get Covariance Matrix

In [None]:
cov_Xc = np.cov(Xc.T)
print(cov_Xc)

## Get Eigenvectors and Eigenvalues

In [None]:
eigenval, eigenvec = np.linalg.eig(cov_Xc)
print(eigenval)
print(eigenvec)

## Get Projections Z and Visualize Them

In [None]:
def plotsurface(center, direction, num_steps, step_size):
  line_x = []
  line_y = []
  line_z = []
  for i in range(num_steps):
    distance_from_center = step_size * (i - num_steps/2)
    point_on_line = center + distance_from_center * direction
    line_x.append(point_on_line[0])
    line_y.append(point_on_line[1])
    line_z.append(point_on_line[2])
  return (line_x, line_y, line_z)

In [None]:
z = np.dot(Xc, eigenvec)

line_x, line_y, line_z = plotline(Xbar, eigenvec[:,0], 55, 0.5)
plt.plot(line_x, line_y, line_z, c="black")

Xhat = np.zeros((len(z[:,0]), 2))
for i in range(len(z[:,0])):
  Xhat[i] = Xbar.reshape(1,2) + z[i,0] * eigenvec[:,0]

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.add_trace(go.Surface(z=line_z, x=line_x, y=line_y, showscale=False, opacity=0.1, colorscale=["rgb(196,196,196)", "rgb(196,196,196)"]))

for i in range(len(z[:,0])):
  fig.add_trace(go.Scatter3d(x=[X[i,0],Xhat[i,0]], y=[X[i,1],Xhat[i,1]], z=[X[i,2],Xhat[i,2]], 
                             marker=dict(size=2,color=["black", "red"]), line=dict(color="black",width=2), showlegend=False))

fig.show()

## Calculate Reconstruction Error

In [None]:
error = np.sum(np.sum((X-Xhat)**2))
print(error)

In [None]:
error = np.sum(z[:,1]**2)
print(error)