In [3]:
#Import the libraries
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [4]:
#Make the notebook run stable across all runs
np.random.seed(42)

In [5]:
#set the matlotlib parameters
plt.rcParams['axes.labelsize']=14
#This is the X axis ->Xtick
plt.rcParams['xtick.labelsize']=12
#Set for Y axis->Ytick
plt.rcParams['ytick.labelsize']=12

In [11]:
#build the 3D datasets
#creating a dataset of shape(60,3,3)
m=60
w1=0.2
w2=0.3
noise =0.1
#np.random.rand(m)-> generates a list of m randome numbers less than 1
angles=np.random.rand(m)*3*np.pi/2 - 0.5

#print the size of random list
angles.size

60

In [12]:
#Create an empty/zero array of size (60,3)
X=np.empty((m,3))

#print the size of X ( 2 D array)
X.shape



(60, 3)

In [16]:
#print the X
X

array([[ 3.71660927e-01,  6.46068615e-01,  3.63552946e-01],
       [ 8.99172688e-01,  5.03262021e-01,  3.95952269e-01],
       [-1.29182253e+00, -1.67832083e-01, -3.40241055e-01],
       [ 1.10617855e-01,  6.11815632e-01,  2.81565183e-01],
       [ 1.19433592e+00,  6.06936385e-01,  3.43665578e-01],
       [ 1.04869772e-01,  6.42441585e-01,  1.90024569e-01],
       [ 1.07080138e+00,  5.47828984e-02,  1.82058790e-01],
       [-1.17680784e+00, -6.39316032e-02, -2.46353635e-01],
       [ 2.07210314e-01, -1.52432854e-01,  2.27178063e-01],
       [-1.16617830e+00, -5.53056384e-01, -5.85879094e-01],
       [-1.17011674e+00,  5.96622160e-02, -1.47498664e-01],
       [ 7.16481127e-01,  2.54857841e-01,  5.84819905e-02],
       [ 5.80902453e-01, -2.71331513e-01, -1.24121497e-02],
       [-8.77718523e-01, -1.19238097e-01, -1.02420074e-01],
       [ 1.43762187e-01,  2.55047925e-01,  1.11694817e-01],
       [-7.89137193e-01,  2.38177680e-01, -1.94148612e-01],
       [-8.67666479e-01, -7.32270729e-03

In [17]:
#Embed the values of 1st column
X[:,0] = np.cos(angles)+np.sin(angles)/2 +np.random.randn(m)/2
#Print the values
X
X[:,1]=np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:,2]=X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

In [18]:
#print the data
print("A random data set")
print(X)

A random data set
[[ 1.30426935  0.68386008  0.33398857]
 [ 1.06812029  0.45779125  0.53410731]
 [-1.58652278 -0.13372972 -0.23947946]
 [ 1.07342707  0.64018033  0.35982195]
 [ 1.14580563  0.4723728   0.19955951]
 [-0.3257663   0.60268065  0.25103817]
 [ 1.10314569  0.13499465  0.24967355]
 [-1.25217107 -0.12494551 -0.16413624]
 [ 0.97165436 -0.14480979 -0.00855483]
 [-0.6247903  -0.58022625 -0.35896344]
 [-0.20575107  0.01398077 -0.03643162]
 [ 0.49871201  0.2705448   0.1856039 ]
 [ 1.728062   -0.34305223  0.19769018]
 [-2.05578785 -0.12827465 -0.38735497]
 [-0.87522331  0.1415407  -0.2393445 ]
 [-0.5822675   0.07296484 -0.108802  ]
 [-0.85594574 -0.03097551 -0.16845224]
 [ 0.60201103 -0.11602152  0.13703963]
 [ 0.73235478  0.66520312  0.41719338]
 [ 0.77544271  0.1059705   0.07441548]
 [-1.41191089 -0.24617925 -0.50964737]
 [-0.01344226  0.44529049  0.25866638]
 [ 1.10398421  0.60946525  0.43686782]
 [ 0.5339407  -0.18953    -0.02491951]
 [ 1.42998879  0.57469573  0.61352167]
 [ 1.09

In [19]:
#Lets use PCA to understand dimenioanlity reduction
from sklearn.decomposition import PCA

pca=PCA(n_components=2)

#Xconvert value
XC=pca.fit_transform(X)

In [56]:
print(XC)

[[-1.25017528 -0.25164106]
 [-1.0293518  -0.17345358]
 [ 1.792162   -0.01534935]
 [-1.02694478 -0.27713598]
 [-1.03170413 -0.05202155]
 [ 0.34621791 -0.54578155]
 [-0.943857    0.22598671]
 [ 1.45304341  0.03193198]
 [-0.71185766  0.53690702]
 [ 0.97303152  0.66447484]
 [ 0.39664613  0.11633127]
 [-0.37299162 -0.02166722]
 [-1.44947512  0.82913636]
 [ 2.27446075 -0.08324696]
 [ 1.06228561 -0.09185904]
 [ 0.76388297 -0.00364218]
 [ 1.05767525  0.0446294 ]
 [-0.39467782  0.37055186]
 [-0.71683116 -0.40260575]
 [-0.58532414  0.23323357]
 [ 1.70432123  0.22249035]
 [ 0.0719717  -0.32984545]
 [-1.06817337 -0.26852961]
 [-0.28054978  0.47664699]
 [-1.41440116 -0.2188645 ]
 [-1.04951519 -0.23932065]
 [ 0.6033779  -0.11559614]
 [ 0.29456944 -0.24805069]
 [ 1.76285216  0.16587654]
 [-0.08980825 -0.65980893]
 [-1.21937133  0.37979403]
 [ 0.64008301 -0.17319775]
 [ 1.146031   -0.10127508]
 [ 0.09206925 -0.4655235 ]
 [ 0.56929644  0.12040163]
 [ 0.08180354 -0.55324332]
 [-0.34043095 -0.46691626]
 

In [57]:
# Recover the 3D points projected on the plane (PCA 2D subspace).
X3D_inv = pca.inverse_transform(XC)
print(X3D_inv)

[[ 1.28557386e+00  6.33263901e-01  4.53359453e-01]
 [ 1.09272517e+00  5.24380171e-01  3.77004968e-01]
 [-1.57584799e+00 -1.04840228e-01 -3.07638047e-01]
 [ 1.06520160e+00  6.17919508e-01  4.12341612e-01]
 [ 1.12451275e+00  4.14747225e-01  3.35514753e-01]
 [-3.17571873e-01  6.24857480e-01  1.98716669e-01]
 [ 1.10783832e+00  1.47694447e-01  2.19711078e-01]
 [-1.23899199e+00 -8.92785599e-02 -2.48284793e-01]
 [ 9.60864886e-01 -1.74009663e-01  6.03360335e-02]
 [-6.24631804e-01 -5.79797320e-01 -3.59975415e-01]
 [-2.04936500e-01  1.61852737e-02 -4.16326814e-02]
 [ 4.99911340e-01  2.73790577e-01  1.77946180e-01]
 [ 1.73961762e+00 -3.11778890e-01  1.23907403e-01]
 [-2.05508779e+00 -1.26380043e-01 -3.91824884e-01]
 [-8.94194782e-01  9.01976367e-02 -1.18211500e-01]
 [-5.86447558e-01  6.16522162e-02 -8.21122686e-02]
 [-8.56579279e-01 -3.26900769e-02 -1.64407090e-01]
 [ 6.16100174e-01 -7.78916493e-02  4.70803527e-02]
 [ 7.37159177e-01  6.78205421e-01  3.86517239e-01]
 [ 7.65616393e-01  7.93772280e-

In [58]:
# Of course, there was some loss of information during the projection step, so the recovered 3D points
# are not exactly equal to the original 3D points:
# Returns True if two arrays are element-wise equal within a tolerance.
print("")
print("reconstruction when 3D dataset is converted to 2D, and from that 3D data is recovered again. Is reconstruction same as original data :-")
#if the element wise array is equal,the below function returns true.
print(np.allclose(X3D_inv, X))
print("")


reconstruction when 3D dataset is converted to 2D, and from that 3D data is recovered again. Is reconstruction same as original data :-
False



In [59]:
#We need to create the reconstruction error
#Row wise addition , first suare the error ,then sum up errors rowwise, the resulting matrix will be (60,1)
#Take the mean of all 60 entries and publish the mean error.
reconstruct_error = np.mean(np.sum(np.square(X3D_inv-X),axis=1))
#Prng the error
reconstruct_error

0.008403181219919018

In [60]:
#PCA components can be retrived using the components_ functinality

pca.components_

array([[-0.95942188, -0.17223061, -0.22326279],
       [ 0.24318711, -0.90617023, -0.34599935]])

In [61]:
pca.explained_variance_

array([1.13891369, 0.12641804])

In [62]:
#Print the shape oe)
#From (60,3) the size has reduced to (60,2)f XC
print(XC.shape)

(60, 2)


In [63]:
 #explained variance ratio
#explaiend variance ratio explains how much variance is explained by the dimensions
print(pca.explained_variance_ratio_)

[0.89405287 0.09923879]


In [67]:
#compute the sum of explaine variance ratio
s=np.sum(pca.explained_variance_ratio_)
#Print the sum
print(s)
#compute 1-s to understand the loss
print((1-s)*100)
#As the result sttes only 0.67% varance 

0.9932916554882117
0.6708344511788256


In [None]:
#The recevrse transformation is also possible using the PCA techniue.
#Recover the 3D points from 2D space
X3D=pca.inverse_transform(XC)