# Import libraries

In [None]:
import sigma
from sigma.utils import normalisation as norm 
from sigma.utils.load import IMAGEDataset
from sigma.src.utils import same_seeds
from sigma.src.dim_reduction import Experiment
from sigma.models.autoencoder import AutoEncoder
from sigma.src.segmentation import PixelSegmenter
from sigma.gui import gui

# Load files

Load the image files from `chemical_maps_dir` and the intensity map file using the path `intensity_map_path`, creating an object of `IMAGEDataset`.

In [None]:
chemical_maps_dir='xrf_elemental_maps'
intensity_map_path='intensity.tif'
im = IMAGEDataset(chemical_maps_dir,intensity_map_path)

# Dataset preprocessing

In [None]:
gui.view_im_dataset(im)

## Normalisation

In [None]:
# Normalise the dataset using the (optional) sequential three methods.

im.normalisation([norm.zscore, 
                   norm.softmax])

Use `gui.view_pixel_distributions` to view the intensity distributions after each sequential normalisation process.

In [None]:
gui.view_pixel_distributions(dataset=im, 
                             norm_list=[norm.zscore,
                                        norm.softmax], 
                             cmap='inferno')

## (Optional) Assign RGB to elemental peaks

In [None]:
gui.view_rgb(im)

## Check elemental distribution after normalisation

In [None]:
print('After normalisation:')
gui.view_intensity_maps(edx=im.normalised_elemental_data, element_list=im.feature_list)

# Dimensionality reduction: Autoencoder

## Initialise experiment / model

In [None]:
# The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
# This can influence the result of dimensionality reduction and change the latent space.
same_seeds(1)

# set the folder path to save the model(the model will automatically save in the specified folder)
result_folder_path='./' 

# Set up the experiment, e.g. determining the model structure, dataset for training etc.
ex = Experiment(descriptor='xrf',
                general_results_dir=result_folder_path,
                model=AutoEncoder,
                model_args={'hidden_layer_sizes':(512,256,128)}, 
                chosen_dataset=im.normalised_elemental_data,
                save_model_every_epoch=True)

## Training

**Parameters for `ex.run_model`**<br>
> `num_epochs`: *int*. The number of entire passes through the training dataset. 50-100 is recommonded for this value. A rule of thumb is that if the loss value stops reducing, that epoch my be a good point to stop. <br>

> `batch_size`: *int*. The number of data points per gradient update. Values between 32-128 are recommended. smaller batch size means more updates within an epoch, but is more stochastic for the optimisation process.<br>

> `learning_rate`: *float* in a range between 0-1. The learning rate controls how quickly the model is adapted to the problem. 1e-4 is recommended. Higher learning rate may yield faster convergence but have a risk to be stuck in an undesirable local minima.<br>

> `task`: *str*. if 'train_all', all data points will be used for training the autoencoder. If 'train_eval', training will be conducted on the training set (85% dataset) and testing on a testing set (15%) for evaluation. The recommended procedure is to run the 'train_eval' for hyperparameter selection, and 'train_all' for the final analysis.<br>

> `criterion`: *str*. If 'MSE', the criterion is to measure the mean squared error (squared L2 norm) between each element in the input x and target y. 'MSE' is the only option. Other criteria will be implemented in the future versions.<br>

In [None]:
# Train the model
ex.run_model(num_epochs=5,
             batch_size=4,
             learning_rate=1e-4, 
             weight_decay=0.0, 
             task='train_all', 
             criterion='MSE'
            ) 

# Pixel segmentation: Gaussian mixture modelling (GMM) clustering

## (Optional) Load pre-trained Autoencoder

In [None]:
model_path = './' # model path (the model path should be stored in the folder 'result_folder_path')
ex.load_trained_model(model_path)

## Measure Baysian information criterion (BIC)

In [None]:
latent = ex.get_latent()
gui.view_bic(latent=latent,
             model='GaussianMixture',
             n_components=20,
             model_args={'random_state':6, 'init_params':'kmeans'})

## Run GMM

In [None]:
latent = ex.get_latent()
ps = PixelSegmenter(latent=latent,
                    dataset=im,
                    method='BayesianGaussianMixture',
                    method_args={'n_components':12, 'random_state':5, 'init_params':'kmeans'} )
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.

## Checking latent space

In [None]:
# Plot latent sapce (2-dimensional) with corresponding Gaussian models
gui.view_latent_space(ps=ps, color=True)

In [None]:
# visualise the latent space
gui.check_latent_space(ps=ps,ratio_to_be_shown=1.0, show_map=True)

In [None]:
# check the density of latent space
gui.plot_latent_density(ps=ps, bins=50)

## Checking each clusters

In [None]:
gui.show_cluster_distribution(ps=ps)

## Checking cluster map

In [None]:
# Plot phase map using the corresponding GM model
gui.view_phase_map(ps=ps)