# (Optional) Link G-drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import libraries

In [1]:
!pip install emsigma

from IPython.display import clear_output 
clear_output()

In [1]:
from sigma.utils import normalisation as norm 
from sigma.utils import visualisation as visual
from sigma.utils.load import SEMDataset
from sigma.src.utils import same_seeds
from sigma.src.dim_reduction import Experiment
from sigma.models.autoencoder import AutoEncoder
from sigma.src.segmentation import PixelSegmenter
from sigma.gui import gui



# Load files

## Test file

In [2]:
# !gdown 'https://drive.google.com/uc?id=1woNRlyrBbUDIClYp_HNldzA2evdpArsi' -O 'test.bcf'

file_path = 'test.bcf'
sem = SEMDataset(file_path)
sem.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])

Set feature_list to ['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La']


## Upload file

In [None]:
from google.colab import files
uploaded = files.upload()

file_path=list(uploaded.keys())[0]
sem = SEMDataset(file_path)

# Dataset preprocessing

## View the dataset
Load the .bcf file and create an object of `SEMDataset` (which uses hyperspy as backend.)

Something you can do with the `sem` object:
1. `sem.bse`: access the back-scattered electron (as a hyperspy file).

2. `sem.edx`: access the edx dataset (as a hyperspy file).

3. `plot_sum_spectrum(sem.edx)`: view the sum spectrum (or use hyperspy built-in function `sem.edx.sum().plot(xray_lines=True)`).

4. `sem.feature_list`: view the default chosen elemental peaks in the edx dataset.

5. `sem.set_feature_list`: set new elemental peaks. 


In [3]:
gui.view_bcf_dataset(sem)

HBox(children=(BoundedFloatText(value=1.4898, continuous_update=True, description='Energy (keV):', step=0.1), …

Output()

HBox(children=(Text(value='Al_Ka, C_Ka, Ca_Ka, Fe_Ka, K_Ka, O_Ka, Si_Ka, Ti_Ka, Zn_La', description='Feature l…

Output()

Tab(children=(Output(), Output(), Output()), _titles={'0': 'BSE image', '1': 'EDX sum spectrum', '2': 'Element…

## Process the dataset
Some useful functions:
1. `sem.rebin_signal(size=(2,2))`: rebin the edx signal with the size of 2x2. After rebinning the dataset, we can access the binned edx or bse data using `sem.edx_bin` or `sem.bse_bin`.

2. `peak_intensity_normalisation(sem.edx_bin)`: normalise the x-ray intensity along energy axis.

3. `remove_fist_peak`: remove the first x-ray peak by calling the function with the argument `end`.

4. `peak_denoising_PCA`: denoise the spectrum using *Principle Component Analysis (PCA)*.

    > `n_components_to_reconstruct`: specify how many components to reconstruct the the EDX intensity profile.

    > `spectrum.plot_results`: True to plot all results.

5. `plot_intensity_maps`: Plot the elemental intensity maps.

In [4]:
# Rebin both edx and bse dataset
sem.rebin_signal(size=(2,2))

# normalisation to make the spectrum of each pixel summing to 1.
sem.peak_intensity_normalisation()

# Remove the first peak until the energy of 0.1 keV
sem.remove_fist_peak(end=0.1) 

# Denoise the X-ray profile using PCA.
sem.peak_denoising_PCA(n_components_to_reconstruct=10, plot_results=False)

Rebinning the intensity with the size of (2, 2)
Normalising the chemical intensity along axis=2, so that the sum is wqual to 1 along axis=2.
Removing the fisrt peak by setting the intensity to zero until the energy of 0.1 keV.
Peak denoising using PCA.
Decomposition info:
  normalize_poissonian_noise=True
  algorithm=SVD
  output_dimension=10
  centre=None


<EDSSEMSpectrum, title: EDX, dimensions: (139, 257|1547)>

In [5]:
# View the dataset (bse, edx etc.) at one time.
gui.view_bcf_dataset(sem)

HBox(children=(BoundedFloatText(value=1.4898, continuous_update=True, description='Energy (keV):', step=0.1), …

Output()

HBox(children=(Text(value='Al_Ka, C_Ka, Ca_Ka, Fe_Ka, K_Ka, O_Ka, Si_Ka, Ti_Ka, Zn_La', description='Feature l…

Output()

Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'BSE image', '1': 'EDX sum spectrum', '2'…

## Normalisation

In [6]:
# Normalise the dataset using these three methods.
sem.normalisation([norm.neighbour_averaging, 
                   norm.zscore, 
                   norm.softmax])

Set feature_list to ['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La']
Normalise dataset using:
    1. neighbour_averaging
    2. zscore
    3. softmax


In [7]:
gui.view_pixel_distributions(sem, 
                             norm_list=[norm.neighbour_averaging,
                                        norm.zscore,
                                        norm.softmax], 
                             peak='Fe_Ka', 
                             cmap='inferno')

Box(children=(Output(),))

HBox(children=(Text(value='results', description='Folder name:', layout=Layout(width='auto'), placeholder='Typ…

Output()

## Assign RGB to elemental peaks

In [8]:
gui.view_rgb(sem)

HBox(children=(Dropdown(description='Data:', options=('normalised', 'binned', 'raw'), value='normalised'), VBo…

Output()

In [9]:
print('After normalisation:')
gui.view_intensity_maps(edx=sem.normalised_elemental_data, element_list=sem.feature_list)

After normalisation:


VBox(children=(HBox(children=(VBox(children=(HBox(children=(ColorPicker(value='#ff0000', description='Al_Ka', …

# Dimensionality reduction: Autoencoder

## Initialise experiment / model

In [14]:
# The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
# This can influence the result of dimensionality reduction and change the latent space.
same_seeds(1)

# Set up the experiment, e.g. determining the model structure, dataset for training etc.
general_results_dir='./' 
ex = Experiment(descriptor='softmax',
                general_results_dir=general_results_dir,
                model=AutoEncoder,
                model_args={'hidden_layer_sizes':(512,256,128)}, # number of hidden layers and corresponding neurons
                chosen_dataset=sem.normalised_elemental_data,
                save_model_every_epoch=True)

model_name: Model-softmax
size_dataset: (257, 139, 9)
device: cpu
num_parameters: 342795


## Training

In [15]:
# Train the model
ex.run_model(num_epochs=1,
             patience=50, 
             batch_size=64,
             learning_rate=1e-4, 
             weight_decay=0.0, 
             task='train_all', # Change to 'train_all' to train all the dataset without separate some parts for evaluation
             noise_added=0.0,
             KLD_lambda=0.0,
             criterion='MSE',
             lr_scheduler_args={'factor':0.5,
                                'patience':5, 
                                'threshold':1e-2, 
                                'min_lr':1e-6,
                                'verbose':True}) 

num_epochs: 1
batch_size: 64
task: train_all
optimizer: lr=0.0001 and weight_decay=0.0

Start training ...



  0%|          | 0/559 [00:00<?, ?batch/s]

Epoch 1 ----> model saved, train_loss=0.002598 | test_loss = 0.002598


# Pixel segmentation: Gaussian mixture modelling (GMM) clustering

## (Optional) Load pre-trained Autoencoder

In [None]:
from google.colab import files
uploaded = files.upload()

ex.load_trained_model(list(uploaded.keys())[0])

## Measure Baysian information criterion (BIC)

In [None]:
latent = ex.get_latent()
gui.view_bic(latent,
             n_components=20,
             model_args={'random_state':6, 'init_params':'kmeans'} )

## Run GMM

In [16]:
latent = ex.get_latent()
ps = PixelSegmenter(latent, 
                    sem.normalised_elemental_data, 
                    sem,
                    method_args={'n_components':12, 'random_state':6, 'init_params':'kmeans'} )
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.

## Checking latent space

In [17]:
 # Plot latent sapce (2-dimensional) with corresponding Gaussian models
gui.view_latent_space(ps, color=True)

VBox(children=(VBox(children=(VBox(children=(HBox(children=(ColorPicker(value='#000000', description='cluster_…

In [18]:
# visualise the latent space
gui.check_latent_space(ps,ratio_to_be_shown=0.5, show_map=True)

In [19]:
# check the density of latent space
gui.plot_latent_density(ps, bins=50)

AttributeError: module 'sigma.gui.gui' has no attribute 'plot_latent_density'

## Checking each clusters

In [22]:
ps.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])
gui.show_cluster_distribution(ps)

Set feature_list to ['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La']


SelectMultiple(options=('All', 'cluster_0', 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', '…

HBox(children=(Text(value='results', description='Folder name:', layout=Layout(width='auto'), placeholder='Typ…

Output()

Output()

## Checking cluster map

In [23]:
# Plot phase map using the corresponding GM model
gui.view_phase_map(ps)

VBox(children=(VBox(children=(VBox(children=(HBox(children=(ColorPicker(value='#000000', description='cluster_…

In [24]:
gui.view_clusters_sum_spectra(ps, normalisation=True, spectra_range=(0,8))

SelectMultiple(options=('cluster_0', 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', 'cluster…

HBox(children=(Text(value='results', description='Folder name:', layout=Layout(width='auto'), placeholder='Typ…

Output()

Tab(children=(Output(), Output()), _titles={'0': 'clusters + edx', '1': 'edx'})

# Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [25]:
weights, components = ps.get_unmixed_edx_profile(clusters_to_be_calculated='All', 
                                                 n_components='All',
                                                 normalised=False, 
                                                 method='NMF', 
                                                 method_args={'init':'nndsvd'})

In [26]:
gui.show_unmixed_weights_and_compoments(ps, weights, components)

HBox(children=(SelectMultiple(options=('cluster_0', 'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'clust…

Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'All weights', '1': 'Single weight', '2':…

# Statistics infro from clusters

In [27]:
gui.show_cluster_stats(ps)

HBox(children=(SelectMultiple(description='cluster:', options=('cluster_0', 'cluster_1', 'cluster_2', 'cluster…

Output()