In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors

def perform_pca(file_path):
    # Load the CSV file
    data = pd.read_csv(file_path)
    
    # Drop rows with missing values
    data_cleaned = data.dropna()
    
    # Extract reflectance values (exclude non-numeric columns like sample IDs)
    reflectance_matrix = data_cleaned.iloc[:, 1:].values  # Adjust slicing as per your dataset
    wavelengths = data_cleaned.columns[1:]  # Extract wavelength column names
    
    # Normalize the reflectance data (standardize: mean=0, std=1)
    reflectance_normalized = (reflectance_matrix - np.mean(reflectance_matrix, axis=0)) / np.std(reflectance_matrix, axis=0)
    
    # Perform PCA
    pca = PCA()
    pca.fit(reflectance_normalized)
    
    # Extract explained variance and principal components
    explained_variance = pca.explained_variance_ratio_
    principal_components = pca.components_
    
    # Get top 10 wavelengths for each of the first 5 PCs
    pc_results = {}
    for pc_index in range(5):
        loading_scores = principal_components[pc_index]  # Loadings for the current PC
        important_wavelengths = sorted(zip(wavelengths, loading_scores), key=lambda x: abs(x[1]), reverse=True)[:10]
        pc_results[f"PC{pc_index + 1}"] = important_wavelengths
    
    return explained_variance[:5], pc_results

def generate_pdf_report(file_paths, output_pdf):
    # Initialize report data
    report_pc_percentages = []
    report_pc_wavelengths = []
    
    # Process each file (day-by-day)
    for day, file_path in enumerate(file_paths, start=1):
        print(f"Processing Day {day}...")
        explained_variance, pc_results = perform_pca(file_path)
        
        # Add explained variance percentages to the report
        report_pc_percentages.append([f"Day {day}"] + [f"{variance * 100:.2f}%" for variance in explained_variance])
        
        # Add top 10 wavelengths for each PC to the report
        day_wavelengths = {}
        for pc, wavelengths in pc_results.items():
            day_wavelengths[pc] = wavelengths
        report_pc_wavelengths.append((f"Day {day}", day_wavelengths))
    
    # Create a PDF document
    doc = SimpleDocTemplate(output_pdf, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []
    
    # Add title
    story.append(Paragraph("PCA Report: Day-by-Day Analysis", styles['Title']))
    story.append(Spacer(1, 12))
    
    # Add explained variance table
    story.append(Paragraph("Explained Variance Percentages", styles['Heading2']))
    pc_percentage_df = pd.DataFrame(report_pc_percentages, columns=["Day", "PC1", "PC2", "PC3", "PC4", "PC5"])
    table_data = [pc_percentage_df.columns.to_list()] + pc_percentage_df.values.tolist()
    table = Table(table_data)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
        ('GRID', (0, 0), (-1, -1), 1, colors.black),
    ]))
    story.append(table)
    story.append(Spacer(1, 12))
    
    # Add top 10 wavelengths for each day
    story.append(Paragraph("Top 10 Wavelengths for Each Principal Component", styles['Heading2']))
    for day, wavelengths in report_pc_wavelengths:
        story.append(Paragraph(f"{day}:", styles['Heading3']))
        for pc, values in wavelengths.items():
            story.append(Paragraph(f"{pc}:", styles['Heading4']))
            table_data = [["Wavelength", "Contribution"]]
            for wavelength, score in values:
                table_data.append([wavelength, f"{score:.4f}"])
            table = Table(table_data)
            table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
            ]))
            story.append(table)
            story.append(Spacer(1, 12))
    
    # Build the PDF
    doc.build(story)
    print(f"PDF report saved to {output_pdf}")

# Example usage
file_paths = [
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 09-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 12-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 23-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 31-07-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 06-09-24.csv",
    "C:\\Users\\sumat\\Desktop\\GSI Suman\\pravakar\\Filter data of Green apple\\Merged data\\merged_wavelength_reflectance 03-10-24.csv"
]
generate_pdf_report(file_paths, "pca_report.pdf")

Processing Day 1...
Processing Day 2...
Processing Day 3...
Processing Day 4...
Processing Day 5...
Processing Day 6...
PDF report saved to pca_report.pdf
