In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA

# Load the Excel file
file_path = 'D:\Spectra\Gsi1.xlsx'  # Replace with the path to your file
sheet_name = 'Gsi1'  # Replace with the name of the relevant sheet
sheet_data = pd.read_excel(file_path, sheet_name=sheet_name)


# Inspect the data
print("First few rows of the dataset:")
print(sheet_data.head())

# Ensure the data has wavelengths as columns and samples as rows
# Drop rows or columns with missing values (if any)
data_cleaned = sheet_data.dropna()

# Extract reflectance values (exclude non-numeric columns like sample IDs)
reflectance_matrix = data_cleaned.iloc[:, 1:].values  # Adjust slicing as per your dataset
wavelengths = data_cleaned.columns[1:]  # Extract wavelength column names

# Normalize the reflectance data (standardize: mean=0, std=1)
reflectance_normalized = (reflectance_matrix - np.mean(reflectance_matrix, axis=0)) / np.std(reflectance_matrix, axis=0)

# Perform PCA
pca = PCA()
pca.fit(reflectance_normalized)

# Extract explained variance and principal components
explained_variance = pca.explained_variance_ratio_
principal_components = pca.components_

# Identify important wavelengths contributing to the first principal component
loading_scores = principal_components[0]  # First principal component loadings
important_wavelengths = list(zip(wavelengths, loading_scores))

# Sort wavelengths by their absolute contribution to the first principal component
important_wavelengths_sorted = sorted(important_wavelengths, key=lambda x: abs(x[1]), reverse=True)

# Output explained variance and top 10 important wavelengths
print("Explained Variance by Principal Components:")
print(explained_variance)

print("\nTop 10 Important Wavelengths (First Principal Component):")
for wavelength, score in important_wavelengths_sorted[:10]:
    print(f"Wavelength: {wavelength}, Contribution: {score}")


# Identify important wavelengths contributing to the second principal component
loading_scores = principal_components[1]  # Second principal component loadings
important_wavelengths = list(zip(wavelengths, loading_scores))

# Sort wavelengths by their absolute contribution to the second principal component
important_wavelengths_sorted = sorted(important_wavelengths, key=lambda x: abs(x[1]), reverse=True)

# Output explained variance and top 10 important wavelengths
print("Explained Variance by Principal Components:")
print(explained_variance)

print("\nTop 10 Important Wavelengths (Second Principal Component):")
for wavelength, score in important_wavelengths_sorted[:10]:
    print(f"Wavelength: {wavelength}, Contribution: {score}")

# Optional: Save the PCA results
output_path = 'pca_results.csv'
pd.DataFrame({
    'Wavelength': [w for w, s in important_wavelengths_sorted],
    'Contribution': [s for w, s in important_wavelengths_sorted]
}).to_csv(output_path, index=False)
print(f"\nPCA results saved to {output_path}")
# PCA1.py
# Displaying PCA1.py.