In [1]:
import numpy as np
import pandas as pd

np.random.seed(23)

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0], [0,1,0], [0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df['target'] =1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0], [0,1,0], [0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df1 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df1['target'] =0

df = df.append(df1, ignore_index = True)

df = df.sample(40)


  df = df.append(df1, ignore_index = True)


In [2]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,-0.822939,-1.598109,0.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [3]:
import plotly.express as px
# y_train_trf = y_train.astype(str)

fig = px.scatter_3d(df, x= df['feature1'] , y = df['feature2'] , z = df['feature3'],
                    color = df['target'].astype('str'))

fig.update_traces(marker = dict(size=12, line = dict(width=2, color='DarkSlateGrey')),
                  selector = dict(mode='markers'))

fig.show()

**\we have to tranform this data from 3D to 2D, so we will try to find 2 best possible co-ordinate axis**

In [4]:
# step 1: mean centering, using standard scaler library

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:, 0:3] = scaler.fit_transform(df.iloc[:, 0:3])

In [5]:
# step 2: find covairance matrix

cov_matrix = np.cov([df.iloc[:,0] , df.iloc[:,1], df.iloc[:,2]])
print("Covariance matrix: \n" , cov_matrix)

Covariance matrix: 
 [[ 1.02564103  0.06781177 -0.12497686]
 [ 0.06781177  1.02564103 -0.15241116]
 [-0.12497686 -0.15241116  1.02564103]]


In [6]:
# step 3: find eigen vectors and eigen values

eigen_values, eigen_vectors = np.linalg.eig(cov_matrix)


In [7]:
eigen_values

array([1.25911792, 0.95953081, 0.85827434])

In [8]:
eigen_vectors

array([[-0.51038783, -0.78846385,  0.34326234],
       [-0.569092  ,  0.60894401,  0.55255904],
       [ 0.64470037, -0.08667156,  0.75950607]])

In [9]:
%pylab inline

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import FancyArrowPatch


class Arrow3D(FancyArrowPatch):
    def __init__(self, xs, ys, zs, *args, **kwargs):
        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
        self._verts3d = xs, ys, zs

    def draw(self, renderer):
        xs3d, ys3d, zs3d = self._verts3d
        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
        FancyArrowPatch.draw(self, renderer)

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111, projection='3d')

ax.plot(df['feature1'], df['feature2'], df['feature3'], 'o', markersize=8, color='blue', alpha=0.2)
ax.plot([df['feature1'].mean()], [df['feature2'].mean()], [df['feature3'].mean()], 'o', markersize=10, color='red', alpha=0.5)
for v in eigen_vectors.T:
    a = Arrow3D([df['feature1'].mean(), v[0]], [df['feature2'].mean(), v[1]], [df['feature3'].mean(), v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
    ax.add_artist(a)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')

plt.title('Eigenvectors')

plt.show()

Populating the interactive namespace from numpy and matplotlib


AttributeError: ignored

<Figure size 700x700 with 1 Axes>

** using the plot we can find the principal components**

In [10]:
pc = eigen_vectors[0:2]
pc

array([[-0.51038783, -0.78846385,  0.34326234],
       [-0.569092  ,  0.60894401,  0.55255904]])

In [11]:
transformed_df = np.dot(df.iloc[:, 0:3], pc.T)

new_df = pd.DataFrame(transformed_df, columns = ['PC1', 'PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,0.560815,-1.471948,1
1,1.740278,-0.6434,0
2,-0.706342,-0.080489,1
3,-0.786474,-1.876564,1
4,1.105696,1.622028,1


In [12]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()