<a href="https://colab.research.google.com/github/rs0927/principal_component_analysis/blob/main/PrincipleComponentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Generate latent variables that will create correlations
latent1 = np.random.normal(size=n_samples)
latent2 = np.random.normal(size=n_samples)

# Create correlated features
f1 = 2.0 * latent1 + np.random.normal(scale=0.5, size=n_samples)  # Strong signal
f2 = -1.5 * latent1 + 0.5 * latent2 + np.random.normal(scale=0.3, size=n_samples)
f3 = 0.8 * latent2 + np.random.normal(scale=0.7, size=n_samples)



# Create target variable (optional, can be used for colored PCA plots)
target = ((f1 > 0.5) & (f2 < 1.0)).astype(int)

# Create DataFrame
data = pd.DataFrame({
    'f1': f1,
    'f2': f2,
    'f3': f3,
    'target': target
})

In [None]:


import plotly.express as px
#y_train_trf = y_train.astype(str)
fig = px.scatter_3d(data, x=data['f1'], y=data['f2'], z=data['f3'],
              color=data['target'].astype('str'))
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.show()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

data.iloc[:,0:3] = scaler.fit_transform(data.iloc[:,0:3])

In [None]:
covariance_matrix = np.cov([data.iloc[:,0],data.iloc[:,1],data.iloc[:,2]])
print('Covariance Matrix:\n', covariance_matrix)

Covariance Matrix:
 [[ 1.001001   -0.90683939 -0.04598105]
 [-0.90683939  1.001001    0.27563257]
 [-0.04598105  0.27563257  1.001001  ]]


In [None]:
eigen_values, eigen_vectors = np.linalg.eig(covariance_matrix)


In [None]:
eigen_values


array([1.96243413, 0.06511413, 0.97545474])

In [None]:
eigen_vectors

array([[-0.67308663,  0.68053202,  0.28953507],
       [ 0.70177437,  0.7112566 , -0.04033342],
       [ 0.23338191, -0.17604041,  0.95631724]])

In [None]:

import plotly.express as px

# Assuming eigen_vectors is a 3x3 matrix where each column is an eigenvector
df_eigenvectors = pd.DataFrame(eigen_vectors, columns=['Eigenvector 1', 'Eigenvector 2', 'Eigenvector 3'])

# Create a 3D scatter plot of the eigenvectors
fig = px.scatter_3d(df_eigenvectors, x='Eigenvector 1', y='Eigenvector 2', z='Eigenvector 3',
                    title='Eigenvectors in 3D Space')

fig.show()


In [None]:
principle_components =  eigen_vectors[0:2]

In [None]:
transformed_df = np.dot(data.iloc[:,0:3],principle_components.T)

new_df = pd.DataFrame(transformed_df,columns=['PC1','PC2'])
new_df['target'] = data['target'].values
new_df.head()

Unnamed: 0,PC1,PC2,target
0,-0.331027,-0.081832,1
1,0.49675,0.021043,0
2,-0.728269,-0.179606,1
3,-1.873397,0.084868,1
4,0.736166,-0.101992,0


In [None]:

new_df['target'] = new_df['target'].astype('str')
fig = px.scatter(x=new_df['PC1'],
                 y=new_df['PC2'],
                 color=new_df['target'],
                 color_discrete_sequence=px.colors.qualitative.G10
                )

fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()
