# About the notebook
The purpose of this Jupyter Notebook is to extract features from images that are relevant for distinguishing between two different groups and save those features to a CSV file.

# 01 - Loading dependencies
In this notebook, before running any code, there are several libraries and modules that need to be imported to ensure that the notebook runs smoothly. These libraries and modules contain pre-written code that performs specific tasks, such as reading and processing images, and extracting features

In [None]:
import os
if 'workbookDir' not in globals():
    print('Updating working directory')
    workbookDir = os.getcwd()
    os.chdir(workbookDir)
print(os.getcwd())

libraries = ["hdaf_filter"]
for library in libraries:
    try:
        __import__(library)
    except ImportError:
        !pip install git+https://github.com/paul-hernandez-herrera/hdaf_filtering
        __import__(library)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import core_code.features as class_features
#allow reloading the functions updates
%load_ext autoreload
%autoreload 2

# 02 - Auxiliary functions
Auxiliary functions to make the code more clear and compact

In [None]:
def create_table(features_dict):
    # Get the number of feature sets
    num_channels = len(features_dict)
    
    # Initialize an empty list to hold the combined features
    combined_features = []
    # Iterate over the files in the first feature set (assuming all sets have the same number of files)
    for i in range(len(features_dict['nucleus']['list_files'])):
        # Initialize a list to hold the features for the current file
        current_file_features = [features_dict['nucleus']['list_files'][i]]
        # Iterate over each set of features (number of channels)
        for k in features_dict.keys():
            current_file_features.extend([
                features_dict[k]['distance'][i],
                features_dict[k]['img_mean'][i],
                features_dict[k]['intensity_mean_objects'][i],
                features_dict[k]['mean_tamano_objetos'][i],
                features_dict[k]['num objects'][i]
            ])
        # Append the combined features of the current file to the result list
        combined_features.append(current_file_features)

    features_keys = ['file_id']
    # Iterate over each set of features (number of channels)
    for k in features_dict.keys():
        features_keys.append([f"F_{k}_distance"])
        features_keys.append([f"F_{k}_img_mean_intensity"])
        features_keys.append([f"F_{k}_objects_mean_intensity"])
        features_keys.append([f"F_{k}_objects_mean_size"])
        features_keys.append([f"F_{k}_number_objects"])   
    return combined_features, features_keys
    
def save_to_excel(data, filename, columns):
    df = pd.DataFrame(data, columns=columns)
    df.to_excel(filename, index=False)


# 03 - Setting required parameters
In this section, users can specify the necessary parameters to extract the features from images. The following parameters are required:

**Folder path group 1**: The path to the folder containing the input images for group 1. \
**Folder path group 2**: The path to the folder containing the input images for group 1. 

**radius channel 1**: The approximate size (radius) of the objects to detect for images in channel 1. \
**radius channel 2**: The approximate size (radius) of the objects to detect for images in channel 2.

In [None]:
folder_group_1 = r"C:\Users\paulh\Downloads\Claudina-1 20x un campo\Claudina-1 20x un campo\20X SANOS"
folder_group_2 = r"C:\Users\paulh\Downloads\Claudina-1 20x un campo\Claudina-1 20x un campo\20X PACIENTES"

# radius - 3 fields of view
# radius_c0 = [4, 7, 10]
# radius_c1 = [1]
# noise_size = 4

# radius 
radius_c0 = [3.5285892540708317, 5.202566786447311, 6.875301098101214]
radius_c1 = [0.8319221980780127]
noise_size = 8

# 04 - Run code
This sections provides the main code to extract the features and create figures from the data.

In [None]:
# features for group 1
features_group1 = {}
features_group1['nucleus'] = class_features.get_features(folder_group_1, channel = 0, radius = radius_c0, noise_size = noise_size)
features_group1['claudina'] = class_features.get_features(folder_group_1, channel = 1, radius = radius_c1, noise_size = noise_size)

table_group1, key_group1 = create_table(features_group1)
save_to_excel(table_group1, f"group_1_r1={'_'.join(f'{x:.2f}' for x in radius_c0)}_r2={'_'.join(f'{x:.2f}' for x in radius_c1)}_noise={noise_size}.xlsx", key_group1)

# features for group 2
features_group2 = {}
features_group2['nucleus'] = class_features.get_features(folder_group_2, channel = 0, radius = radius_c0, noise_size = noise_size)
features_group2['claudina'] = class_features.get_features(folder_group_2, channel = 1, radius = radius_c1, noise_size = noise_size)

table_group2, key_group2 = create_table(features_group2)
save_to_excel(table_group2, f"group_2_r1={'_'.join(f'{x:.2f}' for x in radius_c0)}_r2={'_'.join(f'{x:.2f}' for x in radius_c1)}_noise={noise_size}.xlsx", key_group2)

# Choose the features to plot
f = [['claudina','num objects'], ['claudina', 'distance']]

# Get the features selected for plotting
F_group1 = np.column_stack((features_group1[f[0][0]][f[0][1]], features_group1[f[1][0]][f[1][1]]))
F_group2 = np.column_stack((features_group2[f[0][0]][f[0][1]], features_group2[f[1][0]][f[1][1]]))

# Creating the plot of the features
plt.figure()
plt.plot(F_group1[:, 0], F_group1[:, 1], 'b*', label='Group 1')
plt.plot(F_group2[:, 0], F_group2[:, 1], 'r*', label='Group 2')
plt.xlabel(f'{f[0][0]} --- {f[0][1]}')
plt.ylabel(f'{f[1][0]} --- {f[1][1]}')
plt.legend()
plt.savefig('features_plot.png')  # Guardar la gráfica
plt.show()

print("\n\n Program has finished...")