# Link G-drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import libraries

In [None]:
import sigma
from sigma.utils import normalisation as norm 
from sigma.utils import visualisation as visual
from sigma.utils.load import SEMDataset
from sigma.src.utils import same_seeds
from sigma.src.dim_reduction import Experiment
from sigma.models.autoencoder import AutoEncoder
from sigma.src.segmentation import PixelSegmenter
from sigma.gui import gui

# Load files

Load the *.bcf* file and create an object of `SEMDataset` (which uses hyperspy as backend.)

In [None]:
!gdown --id '1woNRlyrBbUDIClYp_HNldzA2evdpArsi' -O 'test.bcf'

file_path = 'test.bcf'
sem = SEMDataset(file_path)
sem.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])

Upload the file with GUI

In [None]:
from google.colab import files
uploaded = files.upload()

file_path=list(uploaded.keys())[0]
sem = SEMDataset(file_path)

# Dataset preprocessing

## View the dataset

Use `gui.view_bcf_dataset(sem)` to check the BSE image, the sum spectrum, and the elemental maps. Here we can use the small widgets to search the energy peaks and determine the elements for further amalyses. 

After setting the `Feature list`, we obtain the elemental maps hyperspectral imaging dataset (HSI) with the dimension of 279 x 514 x 9 (for the test file).

In [None]:
gui.view_bcf_dataset(sem)

In addition to the GUI, we can view the dataset with the `sem` object:

1. `sem.bse`: access the back-scattered electron (as a hyperspy format).

2. `sem.edx`: access the edx dataset (as a hyperspy format).

3. `visual.plot_sum_spectrum(sem.edx)`: view the sum spectrum (or use hyperspy built-in function `sem.edx.sum().plot(xray_lines=True)`).

4. `sem.feature_list`: view the default chosen elemental peaks in the edx dataset.

5. `sem.set_feature_list`: set new elemental peaks. 

## Process the dataset

### Several (optional) functions to process the dataset:
1. `sem.rebin_signal(size=(2,2))`: rebin the edx signal with the size of 2x2. After rebinning the dataset, we can access the binned edx or bse data using `sem.edx_bin` or `sem.bse_bin`.

2. `sem.peak_intensity_normalisation()`: normalise the x-ray intensity along energy axis.

3. `sem.remove_fist_peak(end=0.1)`: remove the first x-ray peak (most likely noise) by calling the function with the argument `end`.

4. `sem.peak_denoising_PCA`: denoise the spectrum using *Principle Component Analysis (PCA)*.

    > `n_components_to_reconstruct`: specify how many components to reconstruct the the EDX intensity profile.  
    > `plot_results`: True to plot all results.

5. `visual.plot_intensity_maps`: Plot the elemental intensity maps.

In [None]:
# Rebin both edx and bse dataset
sem.rebin_signal(size=(2,2))

# normalisation to make the spectrum of each pixel summing to 1.
sem.peak_intensity_normalisation()

# Remove the first peak until the energy of 0.1 keV
sem.remove_fist_peak(end=0.1) 

# Denoise the X-ray profile using PCA.
sem.peak_denoising_PCA(n_components_to_reconstruct=10, plot_results=False)

In [None]:
# View the dataset (bse, edx etc.) again to check differences.
gui.view_bcf_dataset(sem)

The pre-processing steps yield a HSI datacube with the dimension of 139 x 257 x 9 (due to the 2x2 binning).

## Normalisation

Before dimensionality reduction, we normalise the elemental maps use `sem.normalisation()`, where we can pass a list containing (optional) sequential normalisation steps.

In [None]:
# Normalise the dataset using the (optional) sequential three methods.
sem.normalisation([norm.neighbour_averaging, 
                   norm.zscore, 
                   norm.softmax])

Use `gui.view_pixel_distributions` to view the intensity distributions after each sequential normalisation process.

In [None]:
gui.view_pixel_distributions(sem, 
                             norm_list=[norm.neighbour_averaging,
                                        norm.zscore,
                                        norm.softmax], 
                             peak='Fe_Ka', 
                             cmap='inferno')

## (Optional) Assign RGB to elemental peaks

In [None]:
gui.view_rgb(sem)

## Check elemental distribution after normalisation

In [None]:
print('After normalisation:')
gui.view_intensity_maps(edx=sem.normalised_elemental_data, element_list=sem.feature_list)

# Dimensionality reduction: Autoencoder

## Initialise experiment / model

In [None]:
# The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
# This can influence the result of dimensionality reduction and change the latent space.
same_seeds(1)

# Set up the experiment, e.g. determining the model structure, dataset for training etc.
general_results_dir='./' 
ex = Experiment(descriptor='softmax',
                general_results_dir=general_results_dir,
                model=AutoEncoder,
                model_args={'hidden_layer_sizes':(512,256,128)}, # number of hidden layers and corresponding neurons
                chosen_dataset=sem.normalised_elemental_data,
                save_model_every_epoch=True)

## Training

In [None]:
# Train the model
ex.run_model(num_epochs=100,
             patience=50, 
             batch_size=64,
             learning_rate=1e-4, 
             weight_decay=0.0, 
             task='train_all', # Change to 'train_eval' to train on the training set (85% dataset) and test on a testing set (15%) for evaluation
             noise_added=0.0,
             KLD_lambda=0.0,
             criterion='MSE',
             lr_scheduler_args={'factor':0.5,
                                'patience':5, 
                                'threshold':1e-2, 
                                'min_lr':1e-6,
                                'verbose':True}) 

# Pixel segmentation: Gaussian mixture modelling (GMM) clustering

## (Optional) Load pre-trained Autoencoder

In [None]:
model_path = './' # model path
ex.load_trained_model(model_path)

## Measure Baysian information criterion (BIC)

The `gui.view_bic` iteratively calculates the BIC for Gaussian mixture models using the number of Gaussian components `n_components`, e.g. if `n_components=20`, it shows the BIC values for GMM using n_components from 1 to 20.

In [None]:
latent = ex.get_latent()
gui.view_bic(latent,
             n_components=20,
             model_args={'random_state':6, 'init_params':'kmeans'})

## Run GMM

In [None]:
latent = ex.get_latent()
ps = PixelSegmenter(latent, 
                    sem.normalised_elemental_data, 
                    sem,
                    method_args={'n_components':12, 'random_state':6, 'init_params':'kmeans'} )
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.

## Checking latent space

In [None]:
# Plot latent sapce (2-dimensional) with corresponding Gaussian models
gui.view_latent_space(ps, color=True)

In [None]:
# visualise the latent space
gui.check_latent_space(ps,ratio_to_be_shown=0.5, show_map=True)

In [None]:
# check the density of latent space
gui.plot_latent_density(ps, bins=50)

## Checking each clusters

In [None]:
ps.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])
gui.show_cluster_distribution(ps)

## Checking cluster map

In [None]:
# Plot phase map using the corresponding GM model
gui.view_phase_map(ps)

In [None]:
gui.view_clusters_sum_spectra(ps, normalisation=True, spectra_range=(0,8))

# Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
weights, components = ps.get_unmixed_edx_profile(clusters_to_be_calculated='All', 
                                                 n_components='All',
                                                 normalised=False, 
                                                 method='NMF', 
                                                 method_args={'init':'nndsvd'})

In [None]:
gui.show_unmixed_weights_and_compoments(ps, weights, components)

# Statistics infro from clusters

In [None]:
gui.show_cluster_stats(ps)