# Link G-drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import libraries

In [2]:
!pip install hyperspy[all]
!git clone --branch final https://github.com/poyentung/unmix.git

from IPython.display import clear_output 
clear_output()

In [None]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), 'unmix'))

from utils import normalisation as norm 
from utils import visualisation as visual
from utils.load import SEMDataset
from src.utils import same_seeds
from src.dim_reduction import Experiment
from models.autoencoder import AutoEncoder
from src.segmentation import PixelSegmenter
from py_widgets import widgets

%load_ext autoreload
%autoreload 2

# Load files

## Test file

In [None]:
!gdown --id '1woNRlyrBbUDIClYp_HNldzA2evdpArsi' -O 'test.bcf'

file_path = 'test.bcf'
sem = SEMDataset(file_path)
sem.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])

## Upload file

In [None]:
from google.colab import files
uploaded = files.upload()

file_path=list(uploaded.keys())[0]
sem = SEMDataset(file_path)

# Dataset preprocessing

## View the dataset


In [5]:
widgets.view_bcf_dataset(sem)

HBox(children=(BoundedFloatText(value=1.4898, continuous_update=True, description='Energy (keV):', step=0.1), …

Output()

HBox(children=(Text(value='Al_Ka, C_Ka, Ca_Ka, Fe_Ka, K_Ka, O_Ka, Si_Ka, Ti_Ka, Zn_La', description='Feature l…

Output()

Tab(children=(Output(), Output(), Output()), _titles={'0': 'BSE image', '1': 'EDX sum spectrum', '2': 'Element…

## Process the dataset

In [None]:
# Rebin both edx and bse dataset
sem.rebin_signal(size=(2,2))

# normalisation to make the spectrum of each pixel summing to 1.
sem.peak_intensity_normalisation()

# Remove the first peak until the energy of 0.1 keV
sem.remove_fist_peak(end=0.1) 

# Denoise the X-ray profile using PCA.
sem.peak_denoising_PCA(n_components_to_reconstruct=10, plot_results=False)

In [None]:
# View the dataset (bse, edx etc.) at one time.
widgets.view_bcf_dataset(sem)

## Normalisation

In [None]:
# Normalise the dataset using these three methods.
sem.normalisation([norm.neighbour_averaging, 
                   norm.zscore, 
                   norm.softmax])

In [None]:
widgets.view_pixel_distributions(sem, 
                                norm_list=[norm.neighbour_averaging,
                                           norm.zscore,
                                           norm.softmax], 
                                peak='Fe_Ka', 
                                cmap='inferno')

## Assign RGB to elemental peaks

In [None]:
widgets.view_rgb(sem)

In [None]:
print('After normalisation:')
widgets.view_intensity_maps(edx=sem.normalised_elemental_data, element_list=sem.feature_list)

# Dimensionality reduction: Autoencoder

## Initialise experiment / model

In [None]:
# The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
# This can influence the result of dimensionality reduction and change the latent space.
same_seeds(1)

# Set up the experiment, e.g. determining the model structure, dataset for training etc.
general_results_dir='/content' 
ex = Experiment(descriptor='softmax',
                general_results_dir=general_results_dir,
                model=AutoEncoder,
                model_args={'hidden_layer_sizes':(512,256,128)}, # number of hidden layers and corresponding neurons
                chosen_dataset=sem.normalised_elemental_data,
                save_model_every_epoch=True)

## Training

In [None]:
# Train the model
ex.run_model(num_epochs=100,
             patience=50, 
             batch_size=64,
             learning_rate=1e-4, 
             weight_decay=0.0, 
             task='train_all', # Change to 'train_all' to train all the dataset without separate some parts for evaluation
             noise_added=0.0,
             KLD_lambda=1e-4,
             criterion='MSE',
             lr_scheduler_args={'factor':0.5,
                                'patience':5, 
                                'threshold':1e-2, 
                                'min_lr':1e-6,
                                'verbose':True}) 

# Pixel segmentation: Gaussian mixture modelling (GMM) clustering

## (Optional) Load Autoencoder

In [None]:
from google.colab import files
uploaded = files.upload()

ex.load_trained_model(list(uploaded.keys())[0])

## Measure Baysian information criterion (BIC)

In [None]:
latent = ex.get_latent()
widgets.view_bic(latent, 
                 n_components=20,
                 model='BayesianGaussianMixture', 
                 model_args={'random_state':6, 'init_params':'kmeans'} )

## Run GMM

In [79]:
latent = ex.get_latent()
ps = PixelSegmenter(latent, 
                    sem.normalised_elemental_data, 
                    sem,
                    method='BayesianGaussianMixture', 
                    method_args={'n_components':12, 'random_state':6, 'init_params':'kmeans'} )
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.

## Checking latent space

In [None]:
 # Plot latent sapce (2-dimensional) with corresponding Gaussian models
widgets.view_latent_space(ps, color=True)

In [None]:
# visualise the latent space
widgets.check_latent_space(ps,ratio_to_be_shown=0.5, show_map=True)

In [None]:
# check the density of latent space
ps.plot_latent_density()

## Checking each clusters

In [None]:
ps.set_feature_list(['Al_Ka', 'C_Ka', 'Ca_Ka', 'Fe_Ka', 'K_Ka', 'O_Ka', 'Si_Ka', 'Ti_Ka', 'Zn_La'])
widgets.show_cluster_distribution(ps)

## Checking cluster map

In [None]:
# Plot phase map using the corresponding GM model
widgets.view_phase_map(ps)

In [None]:
widgets.view_clusters_sum_spectra(ps, normalisation=True, spectra_range=(0,8))

# Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [85]:
weights, components = ps.get_unmixed_edx_profile(clusters_to_be_calculated='All', 
                                                 n_components='All',
                                                 normalised=False, 
                                                 method='NMF', 
                                                 method_args={'init':'nndsvd'})

In [None]:
widgets.show_unmixed_weights_and_compoments(ps, weights, components)

# Statistics infro from clusters

In [None]:
widgets.show_cluster_stats(ps)