In [4]:
from pptx import Presentation
from pptx.util import Inches, Pt
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


# APE

In [5]:
# Function to add a slide with an image, histogram, and statistics
def add_slide(prs, target_column, model_name, original_path, ape_path, histogram_path, stats_df):
    """
    Adds a slide to a PowerPoint presentation (pptx) containing an original image,
    an Absolute Percentage Error (APE) image, a histogram, and statistics for a specific target column and model.

    Parameters:
    - prs: PowerPoint presentation (Presentation) where the slide will be added.
    - target_column: Name of the target column.
    - model_name: Name of the model.
    - original_path: Path to the original image.
    - ape_path: Path to the APE image.
    - histogram_path: Path to the histogram.
    - stats_df: DataFrame containing statistics.

    Returns:
    - None
    """
    # Choose a blank slide layout (index 6) 
    blank_slide_layout = prs.slide_layouts[6]

    # Add slide with the chosen layout
    slide = prs.slides.add_slide(blank_slide_layout)

    # Add title (Analysis for column: {target_column}\nModel: {model_name})
    left = Inches(0)
    top = Inches(0)
    width = Inches(10)
    height = Inches(1)
    
    txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)
    
    title_shape = txBox.text_frame
    
    p = title_shape.add_paragraph()
    p.text = f"Analysis for column: {target_column}\nModel: {model_name}"
    
    # Format the title
    p.font.size = Pt(18)
    p.font.bold = True

    # Add original graph
    left = Inches(0.1)
    top = Inches(1)  # Adjust the position below the title
    slide.shapes.add_picture(original_path, left, top, width=Inches(3.2), height=Inches(3.2))
    # Add APE graph
    left = Inches(3.4)
    slide.shapes.add_picture(ape_path, left, top, width=Inches(3.2), height=Inches(3.2))

    # Add histogram graph
    left = Inches(6.8)
    slide.shapes.add_picture(histogram_path, left, top, width=Inches(3.2), height=Inches(3.2))

    # Add selected statistics
    left = Inches(1)
    top = Inches(4.5)  # Adjust the position below the graphs
    width = Inches(9)
    height = Inches(3)
    
    txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)
    
    content_frame = txBox.text_frame
    content_frame.word_wrap = True

    # Add statistics to the slide with font size 12
    content_frame.text = "Statistics:\n\n" + stats_df.loc[['mean', 'std', 'min', '25%', '50%', '75%', '80%', '85%', '90%', '95%', '99%']].to_string()
    for paragraph in content_frame.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(12)


def create_slides(original_images_dir, ape_images_dirs, histogram_images_dirs, ape_stats_dfs, target_columns, output_pptx):
    """
    Creates slides in a PowerPoint presentation (pptx) for the analysis of Absolute Percentage Error (APE) for specific target columns and models.

    Parameters:
    - original_images_dir: Directory containing original images.
    - ape_images_dirs: Dictionary with model names as keys and directories containing APE images as values.
    - histogram_images_dirs: Dictionary with model names as keys and directories containing histogram images as values.
    - ape_stats_dfs: Dictionary with model names as keys and DataFrames containing APE statistics as values.
    - target_columns: List of target columns.
    - output_pptx: Path to the output pptx file.

    Returns:
    - None
    """
    # Initialize the presentation
    prs = Presentation()

    # Add slides for each column and model
    for target_column in target_columns:
        for model_name, ape_images_dir in ape_images_dirs.items():
            # Find paths to the images
            original_path = os.path.join(original_images_dir, target_column, f'{target_column}.png')
            ape_path = os.path.join(ape_images_dir, target_column, f'{target_column}.png')
            histogram_path = os.path.join(histogram_images_dirs[model_name], target_column, f'{target_column}.png')

            # Correct backslashes
            original_path = original_path.replace('\\', '/')
            ape_path = ape_path.replace('\\', '/')
            histogram_path = histogram_path.replace('\\', '/')           
            stats_df = ape_stats_dfs[model_name][target_column]

            # Add slide
            add_slide(prs, target_column, model_name, original_path, ape_path, histogram_path, stats_df)

    # Save the presentation
    prs.save(output_pptx)


map_name = 'map2'
interpolator_method = 'nearest_neighbors'

columns_of_interest = ['N', 'P', 'K', 'Ca', 'Mg', 'S', 'B', 'Cu', 'Fe',
                      'Mn', 'Zn', 'Mo', 'Ni', 'Al', 'Se', 'Si', 'Na', 'Va', 'DRIS_N',
                      'DRIS_P', 'DRIS_K', 'DRIS_Ca', 'DRIS_Mg', 'DRIS_S', 'DRIS_B', 'DRIS_Cu',
                      'DRIS_Fe', 'DRIS_Mn', 'DRIS_Zn', 'IMS', 'IBN']




In [6]:
stats_rf = pd.read_csv(f'data/{map_name}/errors/random_forest_error_stats.csv', index_col=0)

original_images_dir = f'images/{map_name}/heatmaps/{interpolator_method}/interpolation'

ape_images_dirs = {
    'Random Forest': f'images/{map_name}/heatmaps/ape/nn' 
}

histograms_images_dirs = {
    'Random Forest': f'images/{map_name}/histograms/ape/nn'
}

ape_stats_dfs = {
    'Random Forest': stats_rf
}

create_slides(original_images_dir, ape_images_dirs, histograms_images_dirs, ape_stats_dfs, columns_of_interest, f'slides/{map_name}/{map_name}_{interpolator_method}_random_forest_ape.pptx')

In [None]:
stats_lr = pd.read_csv(f'data/{map_name}/errors/linear_regression_error_stats.csv', index_col=0)

# Create slides for APE analysis
original_images_dir = f'images/{map_name}/heatmaps/{interpolator_method}/interpolation'

ape_images_dirs = {
    'Linear Regression': f'images/{map_name}/heatmaps/ape/lr',
}

histograms_images_dirs = {
    'Linear Regression': f'images/{map_name}/histograms/ape/lr',
}

ape_stats_dfs = {
    'Linear Regression': stats_lr,
}


create_slides(original_images_dir, ape_images_dirs, histograms_images_dirs, ape_stats_dfs, columns_of_interest, f'slides/{map_name}/{map_name}_{interpolator_method}_linear_regression_ape.pptx')

In [None]:
stats_nn = pd.read_csv(f'data/{map_name}/errors/keras_error_stats.csv', index_col=0)

# Create slides for APE analysis
original_images_dir = f'images/{map_name}/heatmaps/{interpolator_method}/interpolation'

ape_images_dirs = {
    'Neural Network': f'images/{map_name}/heatmaps/ape/nn',
}

histograms_images_dirs = {
    'Neural Network': f'images/{map_name}/histograms/ape/nn',
}

ape_stats_dfs = {
    'Neural Network': stats_nn,
}


create_slides(original_images_dir, ape_images_dirs, histograms_images_dirs, ape_stats_dfs, columns_of_interest, f'slides/{map_name}/{map_name}_{interpolator_method}_neural_network_ape.pptx')

# Interpolation

In [None]:

uk_interpolation_stats = pd.read_csv(f'data/{map_name}/interpolation/universal_interpolation_stats.csv', index_col=0)
original_stats = pd.read_csv(f'data/{map_name}/nutrients/nutrients_stats.csv', index_col=0)

# Function to add a slide with two plots, histogram, and additional statistics
def add_interpolation(prs, target_column, model_name, image_path, histogram_path, stats_df, additional_stats_df):
    """
    Adds a slide to a PowerPoint presentation (pptx) containing two plots,
    a histogram, and statistics for a specific target column and model, including additional statistics.

    Parameters:
    - prs: PowerPoint presentation (Presentation) where the slide will be added.
    - target_column: Name of the target column.
    - model_name: Name of the model.
    - image_path: Path to the main image.
    - histogram_path: Path to the histogram.
    - stats_df: DataFrame containing main statistics.
    - additional_stats_df: DataFrame containing additional statistics.

    Returns:
    - None
    """
    # Choose a blank slide layout (index 6) 
    blank_slide_layout = prs.slide_layouts[6]

    # Add a slide with the chosen layout
    slide = prs.slides.add_slide(blank_slide_layout)

    # Add title (Analysis for column: {target_column}\nModel: {model_name})
    left = Inches(0)
    top = Inches(0)
    width = Inches(10)
    height = Inches(1)
    
    txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)
    
    title_shape = txBox.text_frame
    
    p = title_shape.add_paragraph()
    p.text = f"Analysis for column: {target_column}\nModel: {model_name}"
    
    # Format the title
    p.font.size = Pt(18)
    p.font.bold = True

    # Add the main plot
    left = Inches(0.1)
    top = Inches(0.5)  # Adjust the position below the title
    slide.shapes.add_picture(image_path, left, top, width=Inches(5), height=Inches(4))

    # Add the histogram plot
    left = Inches(5)
    slide.shapes.add_picture(histogram_path, left, top, width=Inches(4.5), height=Inches(4))

    # Add main statistics
    left = Inches(1)
    top = Inches(4.5)  # Adjust the position below the plots
    width = Inches(9)
    height = Inches(3)
    
    txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)
    
    content_frame = txBox.text_frame
    content_frame.word_wrap = True

    # Add statistics to the slide with font size 12
    content_frame.text = "Interpolated Statistics:\n\n" + stats_df.loc[['mean', 'std', 'min', '25%', '50%', '75%', '80%', '85%', '90%', '95%', '99%']].to_string()
    for paragraph in content_frame.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(12)

    # Add additional statistics
    left = Inches(4.5)  # Adjust the position to the right of the main statistics
    top = Inches(4.5)  # Adjust the position below the plots
    width = Inches(5)
    
    txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)

    additional_stats_frame = txBox.text_frame
    additional_stats_frame.word_wrap = True

    # Add additional statistics to the slide with font size 12
    additional_stats_frame.text = "Original Statistics:\n\n" + additional_stats_df.loc[['mean', 'std', 'min', '25%', '50%', '75%', '80%', '85%', '90%', '95%', '99%']].to_string()
    for paragraph in additional_stats_frame.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(12)


# Function to create slides
def create_slides_interpolation(coords_images_dirs, histogram_images_dirs, stats_dfs, target_columns, output_pptx):
    """
    Creates slides in a PowerPoint presentation (pptx) for the analysis of interpolation
    for specific target columns and models, including additional statistics.

    Parameters:
    - coords_images_dirs: Dictionary with model names as keys and directories containing main images as values.
    - histogram_images_dirs: Dictionary with model names as keys and directories containing histogram images as values.
    - stats_dfs: Dictionary with model names as keys and DataFrames containing statistics as values.
    - target_columns: List of target columns.
    - output_pptx: Path to the output pptx file.

    Returns:
    - None
    """
    # Initialize the presentation
    prs = Presentation()

    # Add slides for each column and model
    for target_column in target_columns:
        for model_name, ape_images_dir in coords_images_dirs.items():
            # Find paths to the images
            ape_path = os.path.join(ape_images_dir, target_column, f'{target_column}.png')
            histogram_path = os.path.join(histogram_images_dirs[model_name], target_column, f'{target_column}.png')

            # Correct backslashes
            ape_path = ape_path.replace('\\', '/')
            histogram_path = histogram_path.replace('\\', '/')
            
            # Find statistics
            stats_df = stats_dfs[model_name].get(target_column, pd.DataFrame())  # Use .get() to avoid KeyError
            original_stats_df = stats_dfs['Original'].get(target_column, pd.DataFrame())  # Add original statistics

            # Add slide
            add_interpolation(prs, target_column, model_name, ape_path, histogram_path, stats_df, original_stats_df)

    # Save the presentation
    prs.save(output_pptx)
    
coords_images_dirs = {
    'UK': f'images/{map_name}/coords',
}

histograms_images_dirs = {
    'UK': f'images/{map_name}/histograms/{interpolator_method}/interpolation',
    # 'UK': 'images/{map_name}/histograms/comparing_interpolation_and_original',
}

stats_dfs = {
    'UK': uk_interpolation_stats,
    # 'UK': uk_closest_points_stats,
    'Original': original_stats
    
}

## Create slides for interpolation analysis
create_slides_interpolation(coords_images_dirs, histograms_images_dirs, stats_dfs, columns_of_interest, f'slides/{map_name}/{map_name}_{interpolator_method}_full_interpolation_results.pptx')


In [None]:
uk_closest_points_stats = pd.read_csv(f'data/{map_name}/closest_points/universal_interpolation_closest_points_stats.csv', index_col=0)

coords_images_dirs = {
    'UK': f'images/{map_name}/coords',
}

histograms_images_dirs = {
    # 'UK': f'images/{map_name}/histograms/interpolation',
    'UK': f'images/{map_name}/histograms/{interpolator_method}/comparing_interpolation_and_original',
}

stats_dfs = {
    # 'UK': uk_interpolation_stats,
    'UK': uk_closest_points_stats,
    'Original': original_stats
}


create_slides_interpolation(coords_images_dirs, histograms_images_dirs, stats_dfs, columns_of_interest, f'slides/{map_name}/{map_name}_{interpolator_method}_closest_points_interpolation_results.pptx')

# Models metrics

In [None]:
# from pptx import Presentation
# from pptx.util import Inches, Pt
# import os

# # Function to add a simple slide with MSE Test, MSE Training, and MAPE graphs
# def add_simple_slide(prs, target_column, model_name, mse_test_path, mse_training_path, mape_path):
#     """
#     Adds a slide to a PowerPoint presentation (pptx) containing three graphs: MSE Test, MSE Training, and MAPE
#     for a specific target column and model.

#     Parameters:
#     - prs: PowerPoint presentation (Presentation) where the slide will be added.
#     - target_column: Name of the target column.
#     - model_name: Name of the model.
#     - mse_test_path: Path to the MSE Test graph.
#     - mse_training_path: Path to the MSE Training graph.
#     - mape_path: Path to the MAPE graph.

#     Returns:
#     - None
#     """
#     # Choose a blank slide layout (index 6) 
#     blank_slide_layout = prs.slide_layouts[6]

#     # Add slide with the chosen layout
#     slide = prs.slides.add_slide(blank_slide_layout)

#     # Add title (Analysis for column: {target_column}\nModel: {model_name})
#     left = Inches(0)
#     top = Inches(0)
#     width = Inches(10)
#     height = Inches(1)
    
#     txBox = slide.shapes.add_textbox(left=left, top=top, width=width, height=height)
    
#     title_shape = txBox.text_frame
    
#     p = title_shape.add_paragraph()
#     p.text = f"Analysis for column: {target_column}\nModel: {model_name}"
    
#     # Format the title
#     p.font.size = Pt(18)
#     p.font.bold = True
    
#     # Add MSE Test graph
#     top = Inches(2)
#     left = Inches(0.1)
#     slide.shapes.add_picture(mse_test_path, left, top, width=Inches(3.2), height=Inches(3.2))

#     # Add MSE Training graph
#     left = Inches(3)
#     slide.shapes.add_picture(mse_training_path, left, top, width=Inches(3.2), height=Inches(3.2))

#     # Add MAPE graph
#     left = Inches(6)
#     slide.shapes.add_picture(mape_path, left, top, width=Inches(3.2), height=Inches(3.2))


# def create_simple_slides(mse_test_images_dirs, mse_training_images_dirs, mape_images_dirs, target_columns, output_pptx):
#     """
#     Creates slides in a PowerPoint presentation (pptx) for the analysis of MSE Test, MSE Training, and MAPE
#     for specific target columns and models.

#     Parameters:
#     - mse_test_images_dirs: Dictionary with model names as keys and directories containing MSE Test images as values.
#     - mse_training_images_dirs: Dictionary with model names as keys and directories containing MSE Training images as values.
#     - mape_images_dirs: Dictionary with model names as keys and directories containing MAPE images as values.
#     - target_columns: List of target columns.
#     - output_pptx: Path to the output pptx file.

#     Returns:
#     - None
#     """
#     # Initialize the presentation
#     prs = Presentation()

#     # Add slides for each column and model
#     for target_column in target_columns:
#         for model_name in mse_test_images_dirs:  # Assuming mse_test_images_dirs contains only the desired model directories
#             # Find paths to the images
#             mse_test_path = os.path.join(mse_test_images_dirs[model_name], target_column, f'{target_column}.png')
#             mse_training_path = os.path.join(mse_training_images_dirs[model_name], target_column, f'{target_column}.png')
#             mape_path = os.path.join(mape_images_dirs[model_name], target_column, f'{target_column}.png')

#             # Correct backslashes
#             mse_test_path = mse_test_path.replace('\\', '/')
#             mse_training_path = mse_training_path.replace('\\', '/')
#             mape_path = mape_path.replace('\\', '/')

#             # Add slide with the three images
#             add_simple_slide(prs, target_column, model_name, mse_test_path, mse_training_path, mape_path)

#     # Save the presentation
#     prs.save(output_pptx)
    
# mse_test_images_dirs = {
#     'All Models': f'images/{map_name}/metrics/mse'
# }

# mse_training_images_dirs = {
#     'All Models': f'images/{map_name}/metrics/training/mean'
# }

# mape_images_dirs = {
#     'All Models': f'images/{map_name}/metrics/mape'
# }


# target_columns = ['N', 'P', 'K']

# create_simple_slides(mse_test_images_dirs, mse_training_images_dirs, mape_images_dirs, target_columns, f'slides/{map_name}/map2_metrics_results.pptx')