#### Generating Data

In [None]:
import numpy as np
import pandas as pd

np.random.seed(23)

mu_vec1 = np.array([0, 0, 0])
cov_mat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df1 = pd.DataFrame(class1_sample, columns=['feature1', 'feature2', 'feature3'])
df1['target'] = 1

mu_vec2 = np.array([1, 1, 1])
cov_mat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df2 = pd.DataFrame(class2_sample, columns=['feature1', 'feature2', 'feature3'])
df2['target'] = 0


In [None]:

# shuffling targets
df = pd.concat([df1,df2], ignore_index=True)
df = df.sample(40)


In [3]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [4]:
# Plotting data in 3D vector space

import plotly.express as px
# y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=df['feature1'], y=df['feature2'], z=df['feature3'],
                    color=df['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2, color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

### Step 1: Standardize the Data


In [5]:
# Step 1 - Apply standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.iloc[:, 0:3] = scaler.fit_transform(df.iloc[:, 0:3])

### Step 2 - Find Covariance Matrix

In [6]:
# Step 2 - Find Covariance Matrix
covariance_matrix = np.cov([df.iloc[:, 0], df.iloc[:, 1], df.iloc[:, 2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[1.02564103 0.20478114 0.080118  ]
 [0.20478114 1.02564103 0.19838882]
 [0.080118   0.19838882 1.02564103]]


### Step 3 - Find Eigenvalues and Eigenvectors

- ***Eigenvectors*** are special vectors in a transformation that, unlike most vectors, do not change direction when a transformation (like a rotation, scaling, or shearing) is applied.<br>
They only scale by some factor, called an ***eigenvalue***. 


In [7]:
# Step 3 - Finding EV and EVs
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)

In [8]:
eigen_values

array([1.3536065 , 0.94557084, 0.77774573])

In [9]:
eigen_vectors

array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]])

In [54]:
# Plot eigen vectors in 3D s
import plotly.graph_objects as go

# Create a 3D scatter plot using Plotly
fig = go.Figure()

# Define colors based on the target: 1 (blue), 0 (orange)
colors = np.where(df['target'] == 1, 'blue', 'orange')

# Add scatter points to the figure
fig.add_trace(go.Scatter3d(
    x=df['feature1'],
    y=df['feature2'],
    z=df['feature3'],
    mode='markers',
    marker=dict(size=8, color=colors, opacity=0.5),
    name='Data Points'
))

# Plot the mean point
mean_point = [df['feature1'].mean(), df['feature2'].mean(), df['feature3'].mean()]
fig.add_trace(go.Scatter3d(
    x=[mean_point[0]], 
    y=[mean_point[1]], 
    z=[mean_point[2]], 
    mode='markers',
    marker=dict(size=10, color='red', opacity=0.5),
    name='Mean Point'
))

# Define the origin for the eigenvectors
origin = np.array([0, 0, 0])

# Add eigenvectors as arrows
for i in range(len(eigen_vectors)):
    fig.add_trace(go.Scatter3d(
        x=[origin[0], eigen_vectors[0, i] * eigen_values[i]], 
        y=[origin[1], eigen_vectors[1, i] * eigen_values[i]], 
        z=[origin[2], eigen_vectors[2, i] * eigen_values[i]],
        mode='lines+text',
        line=dict(color='red', width=3),
        name=f'Eigenvector {i+1}'
    ))

# Set the layout of the figure
fig.update_layout(
    scene=dict(
        xaxis_title='X-axis (Feature 1)',
        yaxis_title='Y-axis (Feature 2)',
        zaxis_title='Z-axis (Feature 3)',
    ),
    title='3D Projection of Eigenvectors with Data Points Colored by Target'
)

# Show the plot
fig.show()


In [None]:
# selecting 2 largest Eigenvectors to convert 3D data to 2D

pc = eigen_vectors[0:2]
pc.shape

(2, 3)

### Step 4 - Calculate Principal Components


In [None]:
# Projecting the input data onto the selected Eigenvectors i.e. the Principal Components

transformed_df = np.dot(df.iloc[:, 0:3], pc.T)
# (40,3) * (3,2) -> (40,2) 
new_df = pd.DataFrame(transformed_df, columns=['PC1', 'PC2'])
new_df['target'] = df['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,0.599433,1.795862,1
1,1.056919,-0.212737,0
2,-0.271876,0.498222,1
3,-0.621586,0.02311,1
4,1.567286,1.730967,1


In [13]:
# Plotting The target against principal components  

new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                 )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()