In [33]:
import numpy as np
import pandas as pd

In [34]:
np.random.seed(23)

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample,columns=['feature1','feature2','feature3'])
df['target'] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df1 = pd.DataFrame(class2_sample,columns=['feature1','feature2','feature3'])

df1['target'] = 0

df = pd.concat([df,df1] , ignore_index = True)
df = df.sample(40)
df.shape

(40, 4)

In [35]:
df

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1
29,1.42514,1.441152,0.182561,0
31,2.224431,0.230401,1.19212,0
4,0.322272,0.060343,-1.04345,1
32,-0.723253,1.461259,-0.085367,0
33,2.823378,-0.332863,2.637391,0


In [36]:
import plotly.express as px
#y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
              color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

In [37]:
# Step 1 - Apply standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:,0:3] = scaler.fit_transform(df.iloc[:,0:3])

In [38]:
df.iloc[:,:3]

Unnamed: 0,feature1,feature2,feature3
2,-0.700809,-1.525586,-1.749156
34,-0.224481,-1.010757,0.491251
14,-0.011456,-0.046935,-0.649616
11,1.342296,-0.962723,-1.184163
12,-2.57135,-0.299561,-0.05404
29,0.867118,0.935791,-0.426437
31,1.566196,-0.219914,0.461019
4,-0.097477,-0.382241,-1.504166
32,-1.011917,0.954984,-0.66196
33,2.090049,-0.75757,1.73149


In [39]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:,0],df.iloc[:,1],df.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


In [40]:
# Step 3 - Finding Eigen values and eigen vectors
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

In [41]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [42]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [49]:
# for 3D to 2D
pc = eigen_vectors[:2]
pc

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442]])

In [50]:
df.iloc[:,0:3].shape

(40, 3)

In [51]:
pc.T.shape

(3, 2)

So output will be in 40 X 2 means 2 column

In [52]:
transformed_df = np.dot(df.iloc[:,0:3],pc.T)
# 40,3 - 3,2
new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = df['target'].values
new_df.shape

(40, 3)

In [53]:
new_df

Unnamed: 0,PC1,PC2,target
0,0.599433,1.795862,1
1,1.056919,-0.212737,0
2,-0.271876,0.498222,1
3,-0.621586,0.02311,1
4,1.567286,1.730967,1
5,-1.320157,-0.257002,0
6,-0.470834,-1.373121,0
7,-0.401542,1.203061,1
8,-0.433734,1.153326,0
9,0.227326,-2.669841,0


In [54]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()

In [55]:
# for 3D to 1D
pc = eigen_vectors[0]
print('pc : ' , pc)
print('pc shape after transpose : ' ,pc.T.shape )

transformed_df = np.dot(df.iloc[:,0:3],pc.T)
# 40,3 - 3,1
new_df = pd.DataFrame(transformed_df,columns=['PC1'])
new_df['target'] = df['target'].values
new_df.head()

pc :  [-0.53875915 -0.69363291  0.47813384]
pc shape after transpose :  (3,)


Unnamed: 0,PC1,target
0,0.599433,1
1,1.056919,0
2,-0.271876,1
3,-0.621586,1
4,1.567286,1


In [58]:
new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()