In [10]:

import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(23) 

# Generate class 1 samples
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

# Create DataFrame for class 1 samples
df_class1 = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df_class1['target'] = 1

# Generate class 2 samples
mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

# Create DataFrame for class 2 samples
df_class2 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df_class2['target'] = 0

# Combine the two DataFrames
df = pd.concat([df_class1, df_class2], ignore_index=True)

# Shuffle the DataFrame
df = df.sample(5).reset_index(drop=True)

# Display the DataFrame
print(df)

   feature1  feature2  feature3  target
0 -0.367548 -1.137460 -1.322148       1
1  0.177061 -0.598109  1.226512       0
2  0.420623  0.411620 -0.071324       1
3  1.968435 -0.547788 -0.679418       1
4 -2.506230  0.146960  0.606195       1


In [20]:
import plotly.express as px
#y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

In [21]:
# apply standard scaling

from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
df.iloc[:,0:3]=scaler.fit_transform(df.iloc[:,0:3])

In [24]:
# step 2- find the covariance matrix
covariance_matrix=np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('covariance matrix:\n',covariance_matrix)

covariance matrix:
 [[ 1.25       -0.3312981  -0.42048064]
 [-0.3312981   1.25        0.55714959]
 [-0.42048064  0.55714959  1.25      ]]


In [25]:
# step 3 -finding eigen values and eigen vectors
eigen_values,eigen_vectors=np.linalg.eig(covariance_matrix)

In [26]:
eigen_values

array([2.12947562, 0.94014695, 0.68037743])

In [27]:
eigen_vectors

array([[ 0.51834579, -0.833056  ,  0.19322356],
       [-0.58839434, -0.51138427, -0.62632119],
       [-0.62057211, -0.21095931,  0.75523932]])

In [37]:
%pylab inline

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import FancyArrowPatch


class Arrow3D(FancyArrowPatch):
    def __init__(self, xs, ys, zs, *args, **kwargs):
        FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
        self._verts3d = xs, ys, zs

    def draw(self, renderer):
        xs3d, ys3d, zs3d = self._verts3d
        xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
        self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
        FancyArrowPatch.draw(self, renderer)

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111, projection='3d')

ax.plot(df['feature1'], df['feature2'], df['feature3'], 'o', markersize=8, color='blue', alpha=0.2)
ax.plot([df['feature1'].mean()], [df['feature2'].mean()], [df['feature3'].mean()], 'o', markersize=10, color='red', alpha=0.5)
for v in eigen_vectors.T:
    a = Arrow3D([df['feature1'].mean(), v[0]], [df['feature2'].mean(), v[1]], [df['feature3'].mean(), v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
    ax.add_artist(a)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')

plt.title('Eigenvectors')

plt.show()



%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


AttributeError: 'Arrow3D' object has no attribute 'do_3d_projection'

<Figure size 700x700 with 1 Axes>

In [33]:
pc=eigen_vectors[0:2]
pc

array([[ 0.61002583, -0.69239396, -0.3853039 ],
       [ 0.53982115,  0.00718615,  0.84174906]])

In [34]:
transformed_df=np.dot(df.iloc[:,0:3],pc.T)
new_df=pd.DataFrame(transformed_df,columns=['pc1','pc2'])
new_df['target']=df['target'].values
new_df.head()

Unnamed: 0,pc1,pc2,target
0,1.072785,-1.319501,1
1,0.049559,1.123698,0
2,-0.000931,0.169982,1
3,1.841864,0.486767,1
4,-1.864189,-0.841596,1


In [36]:

new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['pc1'],
                 y=new_df['pc2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()