In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from datetime import datetime, timedelta

def perform_pca(file_path):
    # Load the CSV file
    data = pd.read_csv(file_path)
    
    # Drop rows with missing values
    data_cleaned = data.dropna()
    
    # Extract reflectance values (exclude non-numeric columns like sample IDs)
    reflectance_matrix = data_cleaned.iloc[:, 1:].values  # Adjust slicing as per your dataset
    wavelengths = data_cleaned.columns[1:]  # Extract wavelength column names
    
    # Normalize the reflectance data (standardize: mean=0, std=1)
    reflectance_normalized = (reflectance_matrix - np.mean(reflectance_matrix, axis=0)) / np.std(reflectance_matrix, axis=0)
    
    # Perform PCA
    pca = PCA()
    pca.fit(reflectance_normalized)
    
    # Extract explained variance and principal components
    explained_variance = pca.explained_variance_ratio_
    principal_components = pca.components_
    
    # Get top 10 wavelengths for each of the first 5 PCs
    pc_results = {}
    for pc_index in range(5):
        loading_scores = principal_components[pc_index]  # Loadings for the current PC
        important_wavelengths = sorted(zip(wavelengths, loading_scores), key=lambda x: abs(x[1]), reverse=True)[:10]
        pc_results[f"PC{pc_index + 1}"] = important_wavelengths
    
    return explained_variance[:5], pc_results

def generate_report(file_paths):
    # Initialize report data
    report_pc_percentages = []
    report_pc_wavelengths = []
    
    # Process each file (day-by-day)
    for day, file_path in enumerate(file_paths, start=1):
        print(f"Processing Day {day}...")
        explained_variance, pc_results = perform_pca(file_path)
        
        # Add explained variance percentages to the report
        report_pc_percentages.append([f"Day {day}"] + [f"{variance * 100:.2f}%" for variance in explained_variance])
        
        # Add top 10 wavelengths for each PC to the report
        day_wavelengths = {}
        for pc, wavelengths in pc_results.items():
            day_wavelengths[pc] = wavelengths
        report_pc_wavelengths.append((f"Day {day}", day_wavelengths))
    
    # Create a DataFrame for the PC percentages table
    pc_percentage_df = pd.DataFrame(report_pc_percentages, columns=["Day", "PC1", "PC2", "PC3", "PC4", "PC5"])
    
    # Print the report
    print("\nPCA Report: Day-by-Day Explained Variance Percentages")
    print(pc_percentage_df.to_string(index=False))
    
    print("\nPCA Report: Day-by-Day Top 10 Wavelengths for Each Principal Component")
    for day, wavelengths in report_pc_wavelengths:
        print(f"\n{day}:")
        for pc, values in wavelengths.items():
            print(f"\n{pc}:")
            for wavelength, score in values:
                print(f"Wavelength: {wavelength}, Contribution: {score:.4f}")

# Example usage
file_paths = [
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 09-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 12-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 23-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 31-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 06-09-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 03-10-24.csv"
]
generate_report(file_paths)

Processing Day 1...
Processing Day 2...
Processing Day 3...
Processing Day 4...
Processing Day 5...
Processing Day 6...

PCA Report: Day-by-Day Explained Variance Percentages
  Day    PC1    PC2   PC3   PC4   PC5
Day 1 87.30% 10.35% 1.48% 0.44% 0.20%
Day 2 92.65%  4.68% 1.47% 0.77% 0.27%
Day 3 95.77%  3.50% 0.44% 0.21% 0.07%
Day 4 95.31%  2.20% 1.29% 0.90% 0.29%
Day 5 95.80%  3.44% 0.46% 0.11% 0.10%
Day 6 84.43% 12.10% 3.01% 0.27% 0.20%

PCA Report: Day-by-Day Top 10 Wavelengths for Each Principal Component

Day 1:

PC1:
Wavelength: 1869, Contribution: 0.0229
Wavelength: 1868, Contribution: 0.0229
Wavelength: 1870, Contribution: 0.0229
Wavelength: 1867, Contribution: 0.0229
Wavelength: 1513, Contribution: 0.0229
Wavelength: 1514, Contribution: 0.0229
Wavelength: 1512, Contribution: 0.0229
Wavelength: 1515, Contribution: 0.0229
Wavelength: 1503, Contribution: 0.0229
Wavelength: 1510, Contribution: 0.0229

PC2:
Wavelength: 670, Contribution: 0.0578
Wavelength: 671, Contribution: 0.0578
W