In [None]:
# Load the dataset
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

iris = load_iris()
X = iris.data
y = iris.target

In [None]:

# Visualize the original data in 3D
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
ax.set_title('Original Iris Data')
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.set_zlabel('Petal length')
plt.show()



In [None]:

# Standardize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_std = scaler.fit_transform(X)


In [None]:

# Compute the covariance matrix
import numpy as np
cov_mat = np.cov(X_std.T)



In [None]:
# Compute the eigenvectors and eigenvalues of the covariance matrix
eig_vals, eig_vecs = np.linalg.eig(cov_mat)


In [None]:

# Sort the eigenvectors by their corresponding eigenvalues
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
eig_pairs.sort(reverse=True)


In [None]:

# Choose the number of principal components
n_components = 3

# Calculate the total variance and the explained variance for each principal component
total_variance = sum(eig_vals)
explained_variance = [(i / total_variance) for i in sorted(eig_vals, reverse=True)]


In [None]:

# Transform the data using the selected principal components
from sklearn.decomposition import PCA
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_std)



In [None]:
# Visualize the transformed data in 2D
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k', s=40)
plt.title('Transformed Iris Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()


In [None]:

# Plot the explained variance as a function of the number of principal components
plt.figure(figsize=(8, 6))
plt.bar(range(1, n_components + 1), explained_variance[:n_components], align='center')
plt.xticks(range(1, n_components + 1))
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance as a Function of the Number of Principal Components')
plt.show()