# Link G-drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install SIGMA using pip

In [None]:
!pip install emsigma

# Import libraries

In [None]:
import sigma
from sigma.utils import normalisation as norm 
from sigma.utils import visualisation as visual
from sigma.utils.loadtem import TEMDataset
from sigma.src.utils import same_seeds
from sigma.src.dim_reduction import Experiment
from sigma.models.autoencoder import AutoEncoder
from sigma.src.segmentation import PixelSegmenter
from sigma.gui import gui

import plotly.io as pio
pio.renderers.default = "colab"

# Load files

Now `TEMDataset` can load either `.emi`/`.ser` or `.emd` files. Note: please put both `.emi` and the corresponding `.ser` files in the same directory.

In [None]:
file_path = './'
tem = TEMDataset(file_path)

Or upload file with GUI

In [None]:
from google.colab import files
uploaded = files.upload()

file_path=list(uploaded.keys())[0]
tem = TEMDataset(file_path)

# 1. Dataset preprocessing

## 1.1 View the dataset

Use `gui.view_dataset(tem)` to check the intensity image, the sum spectrum, and the elemental maps. Here we can use the small widgets to search the energy peaks and determine the elements for further amalyses. 

After setting the `Feature list`, we obtain the elemental maps hyperspectral imaging dataset (HSI) with the dimension of 279 x 514 x 9 (for the test file).

In [None]:
tem.set_xray_lines(['Mn_Ka', 'C_Ka', 'O_Ka', 'Mg_Ka', 'Si_Ka','S_Ka', 'Ca_Ka', 'Fe_Ka', 'Co_Ka'])

In [None]:
gui.view_dataset(dataset=tem)

In addition to the GUI, we can view the dataset with the `tem` object:

1. `tem.nav_img`: access the HAADF image (as a hyperspy format).

2. `tem.spectra`: access the spectral dataset (as a hyperspy format).

3. `visual.plot_sum_spectrum(tem.spectra)`: view the sum spectrum (or use hyperspy built-in function `tem.spectra.sum().plot(xray_lines=True)`).

4. `tem.feature_list`: view the default chosen elemental peaks in the spectral dataset.

5. `tem.set_feature_list`: set new elemental peaks. 

## 1.2 Process the dataset

### Several (optional) functions to process the dataset:
1. `tem.rebin_signal(size=(2,2))`: rebin the spectra signal with the size of 2x2. After rebinning the dataset, we can access the binned spectra or nav_img data using `tem.spectra_bin` or `tem.nav_img_bin`.

2. `tem.peak_intensity_normalisation()`: normalise the x-ray intensity along energy axis.

3. `tem.remove_fist_peak(end=0.1)`: remove the first x-ray peak (most likely noise) by calling the function with the argument `end`.

4. `tem.peak_denoising_PCA`: denoise the spectrum using *Principle Component Analysis (PCA)*.

    > `n_components_to_reconstruct`: specify how many components to reconstruct the the spectra intensity profile.  
    > `plot_results`: True to plot all results.

5. `visual.plot_intensity_maps`: Plot the elemental intensity maps.

In [None]:
# Remove the first peak until the energy of 0.1 keV
tem.remove_fist_peak(end=0.1) 

# normalisation to make the spectrum of each pixel summing to 1.
tem.peak_intensity_normalisation()

## 1.3 Normalisation

Before dimensionality reduction, we normalise the elemental maps use `tem.normalisation()`, where we can pass a list containing (optional) sequential normalisation steps.

In [None]:
# Normalise the dataset using the (optional) sequential three methods.
tem.normalisation([norm.neighbour_averaging, 
                   norm.zscore,
                   norm.softmax])

Use `gui.view_pixel_distributions` to view the intensity distributions after each sequential normalisation process.

In [None]:
gui.view_pixel_distributions(dataset=tem, 
                             norm_list=[norm.neighbour_averaging, 
                                        norm.zscore,
                                        norm.softmax], 
                             cmap='inferno')

## 1.4 Check elemental distribution after normalisation

In [None]:
print('After normalisation:')
gui.view_intensity_maps(spectra=tem.normalised_elemental_data, element_list=tem.feature_list)

# 2. Dimensionality reduction

## 2.1 Method 1: Autoencoder

### 2.1.1 Initialise experiment / model

In [None]:
# The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
# This can influence the result of dimensionality reduction and change the latent space.
same_seeds(1)

# Set up the experiment, e.g. determining the model structure, dataset for training etc.
general_results_dir='./' 
ex = Experiment(descriptor='softmax',
                general_results_dir=general_results_dir,
                model=AutoEncoder,
                model_args={'hidden_layer_sizes':(512,256,128)}, # number of hidden layers and corresponding neurons
                chosen_dataset=tem.normalised_elemental_data,
                save_model_every_epoch=True)

### 2.1.2 Training

In [None]:
# Train the model
ex.run_model(num_epochs=1,
             batch_size=4,
             learning_rate=1e-4, 
             weight_decay=0.0, 
             task='train_all', 
             criterion='MSE'
            ) 
latent = ex.get_latent()

### 2.1.3 (Optional) Load pre-trained Autoencoder

In [None]:
model_path = './' # model path (the model path should be stored in the folder 'result_folder_path')
ex.load_trained_model(model_path)
latent = ex.get_latent()

## 2.2 Method 2: UMAP

In [None]:
from umap import UMAP

# Parameter tuning can be found https://umap-learn.readthedocs.io/en/latest/parameters.html
data = sem.normalised_elemental_data.reshape(-1,len(sem.feature_list))
umap = UMAP(
        n_neighbors=10,
        min_dist=0.1,
        n_components=2,
        metric='euclidean'
    )
latent = umap.fit_transform(data)

# 3. Pixel segmentation: 

## 3.1 Method 1: Gaussian mixture modelling (GMM) clustering

### 3.1.1 Measure Baysian information criterion (BIC)

The `gui.view_bic` iteratively calculates the BIC for Gaussian mixture models using the number of Gaussian components `n_components`, e.g. if `n_components=20`, it shows the BIC values for GMM using n_components from 1 to 20.

In [None]:
latent = ex.get_latent()
gui.view_bic(latent,
             n_components=10,
             model_args={'random_state':6, 'init_params':'kmeans'})

### 3.1.2 Run GMM

In [None]:
latent = ex.get_latent()
ps = PixelSegmenter(latent, 
                    dataset=tem,
                    method_args={'n_components':12, 'random_state':6, 'init_params':'kmeans'} )
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.

## 3.2 Method 2: HDBSCAN clustering

In [None]:
# hyperparameter tuning can be found https://scikit-learn.org/stable/auto_examples/cluster/plot_hdbscan.html#hyperparameter-robustness
ps = PixelSegmenter(latent=latent, 
                    dataset=sem,
                    method="HDBSCAN",
                    method_args=dict(min_cluster_size=25, min_samples=25,
                                     max_cluster_size=int(len(latent)/10),
                                     cluster_selection_epsilon=4e-2) )

## 3.3 Visualisation

### 3.3.1 Checking latent space

In [None]:
# Plot latent sapce (2-dimensional) with corresponding Gaussian models
gui.view_latent_space(ps, color=True)

In [None]:
# visualise the latent space
gui.check_latent_space(ps,ratio_to_be_shown=1.0, show_map=True)

In [None]:
# check the density of latent space
gui.plot_latent_density(ps, bins=50)

### 3.3.2 Quantifying compositions in clusters using Cliff-Lorimer

In [None]:
gui.plot_ternary_composition(ps)

### 3.3.3 Checking each clusters

In [None]:
gui.show_cluster_distribution(ps, spectra_range=(0,10))

### 3.3.4 Checking cluster map

In [None]:
# Plot phase map using the corresponding GM model
gui.view_phase_map(ps)

In [None]:
gui.view_clusters_sum_spectra(ps, normalisation=True, spectra_range=(0,10))