In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def k_means_clustering(dataset, num_clusters):
    # Extract the Species column
    species = dataset['Species']
    dataset = dataset.drop('Species', axis=1)
    
    # Convert DataFrame to numpy array
    X = dataset.values
    
    # Initialize centroids randomly
    np.random.seed(0)
    centroids = X[np.random.choice(range(len(X)), num_clusters, replace=False)]
    
    # Initialize cluster labels
    labels = np.zeros(len(X))
    
    while True:
        # Assign data points to nearest centroid
        new_labels = np.argmin(np.linalg.norm(X[:, np.newaxis] - centroids, axis=2), axis=1)
        
        # Check for convergence
        if np.array_equal(labels, new_labels):
            break
        
        labels = new_labels
        
        # Update centroids
        for i in range(num_clusters):
            centroids[i] = np.mean(X[labels == i], axis=0)
    
    return labels, species


def principal_component_analysis(dataset):
    # Extract the Species column
    species = dataset['Species']
    dataset = dataset.drop('Species', axis=1)
    
    # Convert DataFrame to numpy array
    X = dataset.values
    
    # Normalize the data
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    
    # Calculate the covariance matrix
    cov_matrix = np.cov(X.T)
    
    # Calculate eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    
    # Sort eigenvalues and eigenvectors in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]
    
    # Select the top three eigenvectors
    top_eigenvectors = eigenvectors[:, :3]
    
    # Project the data onto the top three eigenvectors
    projected_data = X.dot(top_eigenvectors)
    
    return projected_data, eigenvalues[:3]


# Example usage of the implemented K-Means Clustering and PCA algorithms
# Assuming dataset is a DataFrame containing the Iris dataset

# K-Means Clustering
k = 3  # Number of clusters
kmeans_labels, species = k_means_clustering(dataset, k)

# PCA
projected_data, eigenvalues = principal_component_analysis(dataset)

# Visualization of K-Means Clustering
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# Plotting the output of the cluster
axs[0].scatter(projected_data[:, 0], projected_data[:, 1], c=kmeans_labels, cmap='viridis')
axs[0].set_xlabel('Principal Component 1')
axs[0].set_ylabel('Principal Component 2')
axs[0].set_title('K-Means Clustering Output')

# Plotting the actual species
species_labels = np.unique(species)
species_colors = ['red', 'green', 'blue']
for label, color in zip(species_labels, species_colors):
    indices = np.where(species == label)
    axs[1].scatter(projected_data[indices, 0], projected_data[indices, 1], color=color, label=label)

axs[1].set_xlabel('Principal Component 1')
axs[1].set_ylabel('Principal Component 2')
axs[1].set_title('Actual Species')
axs[1].legend()

plt.tight_layout()
plt.show()