In [None]:
import pandas as pd

# 1. Data Example

In [None]:
# Eating, exercise habbit and their body shape
df = pd.DataFrame(columns=['calory', 'breakfast', 'lunch', 'dinner', 'exercise', 'body_shape'])

In [None]:
df.loc[0] = [1200, 1, 0, 0, 2, 'Skinny']
df.loc[1] = [2800, 1, 1, 1, 1, 'Normal']
df.loc[2] = [3500, 2, 2, 1, 0, 'Fat']
df.loc[3] = [1400, 0, 1, 0, 3, 'Skinny']
df.loc[4] = [5000, 2, 2, 2, 0, 'Fat']
df.loc[5] = [1300, 0, 0, 1, 2, 'Skinny']
df.loc[6] = [3000, 1, 0, 1, 1, 'Normal']
df.loc[7] = [4000, 2, 2, 2, 0, 'Fat']
df.loc[8] = [2600, 0, 2, 0, 0, 'Normal']
df.loc[9] = [3000, 1, 2, 1, 1, 'Fat']

In [None]:
df.head(10)

# 2. Split feature vectors and labels

In [None]:
# X is feature vectors
X = df[['calory', 'breakfast', 'lunch', 'dinner', 'exercise']]

In [None]:
X.head(9)

In [None]:
# Y is labels

In [None]:
Y = df[['body_shape']]

In [None]:
Y.head(10)

# 3. rescaling feature vectors to all have the same scale

In [None]:
from sklearn.preprocessing import StandardScaler
x_std = StandardScaler().fit_transform(X)

In [None]:
x_std

# 4. Covariance Matrix of features

In [None]:
import numpy as np

In [None]:
# features are columns from x_std
features = x_std.T 
covariance_matrix = np.cov(features)
print(covariance_matrix)

# 5. Eigen Vectors and Eigen Values from Covariance Matrix

In [None]:
eig_vals, eig_vecs = np.linalg.eig(covariance_matrix)

In [None]:
print('Eigenvectors \n%s' %eig_vecs)

In [None]:
print('\nEigenvalues \n%s' %eig_vals)

In [None]:
# We reduce dimension to 1 dimension, since 1 eigenvector has 73% (enough) variances
eig_vals[0] / sum(eig_vals)

# 6. Project data point onto selected Eigen Vector

In [None]:
projected_X = x_std.dot(eig_vecs.T[0])

In [None]:
projected_X

In [None]:
result = pd.DataFrame(projected_X, columns=['PC1'])
result['y-axis'] = 0.0
result['label'] = Y

In [None]:
result.head(10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
sns.lmplot('PC1', 'y-axis', data=result, fit_reg=False,  # x-axis, y-axis, data, no line
           scatter_kws={"s": 50}, # marker size
           hue="label") # color

# title
plt.title('PCA result')

# bonus - scikit-learn PCA

In [None]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=1)
sklearn_pca_x = pca.fit_transform(x_std)

In [None]:
sklearn_result = pd.DataFrame(sklearn_pca_x, columns=['PC1'])
sklearn_result['y-axis'] = 0.0
sklearn_result['label'] = Y

sns.lmplot('PC1', 'y-axis', data=sklearn_result, fit_reg=False,  # x-axis, y-axis, data, no line
           scatter_kws={"s": 50}, # marker size
           hue="label") # color

# https://github.com/minsuk-heo/python_tutorial/blob/master/data_science/pca/PCA.ipynb