In [None]:
## PCA on Crop Recommendation dataset

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import itertools

df = pd.read_csv('Crop.csv')
df = df[0:400]
df= df.drop(['ph','rainfall'], axis =1)


# Create an empty list to store figure objects
figure_list = []

pd.unique(df['label'])

unique_instances = df['label'].unique()
print("Unique Instances of label attribute:", unique_instances)

# Create a mapping of labels to numbers
label_mapping = {
    'rice': 0,
    'maize': 1,
    'chickpea': 2,
    'kidneybeans': 3,
}
# Replace the values in the 'label' column with numbers
df['label'] = df['label'].replace(label_mapping)
df.label

y = df['label'] 
X = df.drop('label', axis=1)
matrix = X.values
print("Matrix representation of the dataset:")
dimension = matrix.shape
print("Dimension of the matrix:", dimension)


p=sns.pairplot(df, hue = 'label')


# Compute the mean of each attribute
attribute_means = np.mean(matrix, axis=0)

# Subtract the attribute means from the matrix
zero_mean_matrix = matrix - attribute_means


dfz = pd.DataFrame(zero_mean_matrix)
dfz

# Compute the variance along the data axis
variance = dfz.var()

# Print the variance
print("Variance along the data axis:")
print(variance)

# Compute the covariance matrix
covariance_matrix = dfz.cov()

# Print the covariance matrix
print("Covariance Matrix:")
print(covariance_matrix)


# Compute the eigenvalues and eigenvectors of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

# Sort the eigenvectors based on eigenvalues in descending order
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Print the sorted eigenvalues and eigenvectors
print("Sorted Eigenvalues:")
print(sorted_eigenvalues)

print("\nSorted Eigenvectors:")
print(sorted_eigenvectors)

# Perform the change of basis
transformed_data = np.dot(zero_mean_matrix, sorted_eigenvectors)
dft = pd.DataFrame(transformed_data)

## PCA using all the sorted_eigenvectors

# Compute the sum of all eigenvalues
eigenvalue_sum = np.sum(sorted_eigenvalues)

# Compute the proportion of variance explained by each eigenvalue
variance_proportion = sorted_eigenvalues / eigenvalue_sum

# Print the proportion of variance explained by each eigenvalue
print("Proportion of Variance Explained by Each Eigenvalue:")
print(variance_proportion)
sum(variance_proportion)

## Ploting Different Combinations

# Project the dataset onto the selected eigenvectors
principal_components = np.dot(transformed_data, sorted_eigenvectors)

# Plot all combinations of 2D and 3D plots

num_components = principal_components.shape[1]

# Creating an array of marker types
markers = ['^', 's', 'x', '+']

target_labels = ['rice', 'maize', 'chickpea', 'kidneybeans']

# Arrays to store the figures
figures_2d = []
figures_3d = []

# 2D plots
for i in range(num_components - 1):
    for j in range(i + 1, num_components):
        fig = plt.figure()
        for k in range(len(np.unique(y))):
            plt.scatter(principal_components[y == k, i], principal_components[y == k, j],
                        marker=markers[k])
            plt.xlabel('Principal Component {}'.format(i + 1))
            plt.ylabel('Principal Component {}'.format(j + 1))
            plt.legend(target_labels)
            plt.title('PCA of Crop Dataset - 2D Plot (PC{} vs PC{})'.format(i + 1, j + 1))
        # Append the figure to the 2D figures array
        figures_2d.append(fig)
    
# 3D plots
combinations = list(itertools.combinations(range(num_components), 3))

for pc1, pc2, pc3 in combinations:
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    for k in range(len(np.unique(df.label))):
        ax.scatter(principal_components[df.label == k, pc1], principal_components[df.label == k, pc2],
                   principal_components[df.label == k, pc3], marker=markers[k])
    ax.set_xlabel('Principal Component {}'.format(pc1 + 1))
    ax.set_ylabel('Principal Component {}'.format(pc2 + 1))
    ax.set_zlabel('Principal Component {}'.format(pc3 + 1))
    ax.legend(target_labels)
    ax.set_title('PCA of Crop Dataset - 3D Plot (PC{} vs PC{} vs PC{})'.format(pc1 + 1, pc2 + 1, pc3 + 1))
    # Append the figure to the 3D figures array
    figures_3d.append(fig)
    

# Save the figures as images
for i, fig in enumerate(figures_2d):
    fig.savefig('2D_plot_{}.png'.format(i + 1))

for i, fig in enumerate(figures_3d):
    fig.savefig('3D_plot_{}.png'.format(i + 1))

# Display the figures
plt.show()



