In [1]:
# Importing libraries
import pandas as pd
import numpy as np

# In Numpy we have a module linalg which can be used to find eigen values and vectors. 
# See the official documentation here: https://numpy.org/doc/stable/reference/generated/numpy.linalg.eig.html
from numpy import linalg

**Here is the documentation for linalg:** https://numpy.org/doc/stable/reference/generated/numpy.linalg.eig.html

In [2]:
# Making the data
data={'Alp':[20,10,13,1,18,4],'Himalaya':[4,2,1,13,10,20],'Hawaii':[16,18,19,7,10,0],'Scuba':[0,10,7,19,2,16]}

In [3]:
# Changing the data to a dataframe
df=pd.DataFrame(data,index=['Anne','Bill','Chris','Jen','Joe','Maggie'])

In [4]:
# Displaying the data
df 

Unnamed: 0,Alp,Himalaya,Hawaii,Scuba
Anne,20,4,16,0
Bill,10,2,18,10
Chris,13,1,19,7
Jen,1,13,7,19
Joe,18,10,10,2
Maggie,4,20,0,16


In [5]:
# Finding the Covariance Matrix

df.cov()

Unnamed: 0,Alp,Himalaya,Hawaii,Scuba
Alp,56.8,-32.8,32.8,-56.8
Himalaya,-32.8,54.666667,-54.666667,32.8
Hawaii,32.8,-54.666667,54.666667,-32.8
Scuba,-56.8,32.8,-32.8,56.8


In [6]:
# Printing the eigenvalues
# linalg.eigvals() is use to find the eigenvalues and eigenvectors of a matrix

w, v = linalg.eig(df.cov())
print("Printing Eigenvalues: ")
print(w)

Printing Eigenvalues: 
[ 1.77101346e+02 -1.54171923e-14  4.58319875e+01  2.43716086e-15]


In [7]:
print("Printing Eigenvectors: ")
print(v)

Printing Eigenvectors: 
[[-5.08060809e-01  7.07106781e-01  4.91807091e-01  7.13771691e-02]
 [ 4.91807091e-01  2.50227821e-16  5.08060809e-01 -7.03495060e-01]
 [-4.91807091e-01 -2.90939598e-16 -5.08060809e-01 -7.03495060e-01]
 [ 5.08060809e-01  7.07106781e-01 -4.91807091e-01  7.13771691e-02]]


* As we have 2 dimensions in the data, there will be 2 eigenvectors and 2 corresponding eigenvalues.
* The two eigenvectors with the highest eigenvalues are taken as the principal components.

In [14]:
# The components are 
print([i[0] for i in v])
print([i[2] for i in v])

[-0.5080608089947457, 0.4918070905991539, -0.49180709059915395, 0.5080608089947458]
[0.4918070905991542, 0.5080608089947455, -0.5080608089947455, -0.4918070905991542]


In [9]:
print(v.round())

[[-1.  1.  0.  0.]
 [ 0.  0.  1. -1.]
 [-0. -0. -1. -1.]
 [ 1.  1. -0.  0.]]


### Using the sklearn library

In [10]:
from sklearn.decomposition import PCA

**Here is the documentation for PCA:** https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

In [11]:
# Defining the number of principal components to generate 
n = 2

In [12]:
pca = PCA(n_components=n)
pca.fit(df)

PCA(n_components=2)

In [15]:
# Printing the components
print(pca.components_)

[[-0.50806081  0.49180709 -0.49180709  0.50806081]
 [ 0.49180709  0.50806081 -0.50806081 -0.49180709]]
