# Notebook script for generation of training dataset (supports single and two phase material)

In [1]:
## Import modules used for this Notebook
import os

## if LaueToolsNN is properly installed
try:
    from lauetoolsnn.utils_lauenn import generate_classHKL, generate_dataset, rmv_freq_class, get_material_detail, prepare_LP_NB
except:
    # else import from a path where LaueToolsNN files are
    import sys
    sys.path.append(r"USER_PATH_HERE")
    from utils_lauenn import generate_classHKL, generate_dataset, rmv_freq_class,  get_material_detail, prepare_LP_NB

## step 1: define material and other parameters for simulating Laue patterns

In [2]:
# =============================================================================
## User Input dictionary with parameters
## In case of only one phase/material, keep same value for material_ and material1_ key
# =============================================================================
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
import keras
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.constraints import maxnorm

metricsNN = [
            keras.metrics.FalseNegatives(name="fn"),
            keras.metrics.FalsePositives(name="fp"),
            keras.metrics.TrueNegatives(name="tn"),
            keras.metrics.TruePositives(name="tp"),
            keras.metrics.Precision(name="precision"),
            keras.metrics.Recall(name="accuracy"),
            ]

def model_arch_general_optimized(n_bins, n_outputs, kernel_coeff = 0.0005, bias_coeff = 0.0005, lr=None, verbose=1,
                       write_to_console=None):
    """
    Very simple and straight forward Neural Network with few hyperparameters
    straighforward RELU activation strategy with cross entropy to identify the HKL
    Tried BatchNormalization --> no significant impact
    Tried weighted approach --> not better for HCP
    Trying Regularaization 
    l2(0.001) means that every coefficient in the weight matrix of the layer 
    will add 0.001 * weight_coefficient_value**2 to the total loss of the network
    1e-3,1e-5,1e-6
    """
    if n_outputs >= n_bins:
        param = n_bins
        if param*15 < (2*n_outputs): ## quick hack; make Proper implementation
            param = (n_bins + n_outputs)//2
    else:
        # param = n_outputs ## More reasonable ???
        param = n_outputs*2 ## More reasonable ???
        # param = n_bins//2

    model = Sequential()
    model.add(keras.Input(shape=(n_bins,)))
    ## Hidden layer 1
    model.add(Dense(n_bins, kernel_regularizer=l2(kernel_coeff), bias_regularizer=l2(bias_coeff), activation='relu'))
    # model.add(BatchNormalization())
    # model.add(Activation('relu'))
    model.add(Dropout(0.3)) ## Adding dropout as we introduce some uncertain data with noise
    ## Hidden layer 2
    model.add(Dense(((param)*15 + n_bins)//2, kernel_regularizer=l2(kernel_coeff), bias_regularizer=l2(bias_coeff), activation='relu'))
    # model.add(BatchNormalization())
    # model.add(Activation('relu'))
    model.add(Dropout(0.3))
    ## Hidden layer 3
    model.add(Dense((param)*15, kernel_regularizer=l2(kernel_coeff), bias_regularizer=l2(bias_coeff), activation='relu'))
    # model.add(BatchNormalization())
    # model.add(Activation('relu'))
    model.add(Dropout(0.3))
    ## Output layer 
    model.add(Dense(n_outputs, activation='softmax'))
    ## Compile model
    if lr != None:
        otp = tf.keras.optimizers.Adam(learning_rate=lr)
        model.compile(loss='categorical_crossentropy', optimizer=otp, metrics=[metricsNN])
    else:
        model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=[metricsNN])
    return model

## Step 2: Get material parameters 
### Generates a folder with material name and gets material unit cell parameters and symmetry object from the get_material_detail function

In [13]:

mat = ["Cu", "Ti", "Sn", "ZrO2", "Li2VOPO4"]
sym = ["cubic","hexagonal","tetragonal","monoclinic","triclinic"]
for ij in range(len(mat)):
    material_= mat[ij]
    material1_= mat[ij]
    n = 5
    symm_ = sym[ij]
    symm1_ = sym[ij]
    SG = None
    SG1 = None
    save_directory = os.getcwd()+"//"+material_
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    ## get unit cell parameters and other details required for simulating Laue patterns
    rules, symmetry, lattice_material, \
        crystal, SG, rules1, symmetry1,\
        lattice_material1, crystal1, SG1 = get_material_detail(material_, SG, symm_,
                                                               material1_, SG1, symm1_)
    ## procedure for generation of GROUND TRUTH classes
    # general_diff_cond = True will eliminate the hkl index that does not satisfy the general reflection conditions
    generate_classHKL(n, rules, lattice_material, symmetry, material_, crystal=crystal, SG=SG, general_diff_cond=True,
              save_directory=save_directory, write_to_console=print, ang_maxx = 120, 
              step = 0.1)
    import numpy as np
    import _pickle as cPickle
    with open(save_directory+"//classhkl_data_"+material_+".pickle", "rb") as input_file:
        classhkl, _, _, _, _, _, _, _, _ = cPickle.load(input_file)
    angbins = np.arange(0, 120+0.1, 0.1)
    n_bins = len(angbins)-1
    n_outputs = len(classhkl)
    print(material_, symm_)
    model = model_arch_general_optimized(n_bins, n_outputs)
    print("output_class", n_outputs)
    print(model.count_params())

Generating HKL objects
Removing harmonics and building equivalent HKL objects
Finalizing the HKL objects
Saved class HKL data in : C:\Users\purushot\Anaconda3\envs\laueNN\Lib\site-packages\lauetoolsnn\example_notebook_scripts//Cu//classhkl_data_Cu.pickle
Cu cubic
output_class 13
2711518
Generating HKL objects
Removing harmonics and building equivalent HKL objects
Finalizing the HKL objects
Saved class HKL data in : C:\Users\purushot\Anaconda3\envs\laueNN\Lib\site-packages\lauetoolsnn\example_notebook_scripts//Ti//classhkl_data_Ti.pickle
Ti hexagonal
output_class 98
10304228
Generating HKL objects
Removing harmonics and building equivalent HKL objects
Finalizing the HKL objects
Saved class HKL data in : C:\Users\purushot\Anaconda3\envs\laueNN\Lib\site-packages\lauetoolsnn\example_notebook_scripts//Sn//classhkl_data_Sn.pickle
Sn tetragonal
output_class 98
10304228
Generating HKL objects
Removing harmonics and building equivalent HKL objects
Finalizing the HKL objects
Saved class HKL data

In [14]:
model.summary()

Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_96 (Dense)            (None, 1200)              1441200   
                                                                 
 dropout_73 (Dropout)        (None, 1200)              0         
                                                                 
 dense_97 (Dense)            (None, 9255)              11115255  
                                                                 
 dropout_74 (Dropout)        (None, 9255)              0         
                                                                 
 dense_98 (Dense)            (None, 17310)             160221360 
                                                                 
 dropout_75 (Dropout)        (None, 17310)             0         
                                                                 
 dense_99 (Dense)            (None, 577)             

In [3]:
import numpy as np
from tqdm import trange
mat = ["Cu", "Ti", "Sn", "ZrO2", "Li2VOPO4"]
sym = ["cubic","hexagonal","tetragonal","monoclinic","triclinic"]

mat = ["Al2TiO5"]
sym = ["orthorhombic"]


for ij in trange(len(mat)):
    material_= mat[ij]
    material1_= mat[ij]

    noisy_data = False #bool(random.getrandbits(1)) 
    remove_peaks = False #bool(random.getrandbits(1)) 
    
    nbgrains = 1 ## material0
    nbgrains1 = 0 ## material1
    verbose = 0
    
    length = []
    for _ in range(100):
        seednumber = np.random.randint(1e6)
        tabledistancerandom, hkl_sol, \
                s_posx, s_posy, s_I, s_tth, s_chi, g, g1  = prepare_LP_NB(nbgrains, nbgrains1,
                                                                        material_, verbose,
                                                                        material1_ = material1_,
                                                                        seed = seednumber,sortintensity=True,
                                                                        detectorparameters=[79.553,979.32,932.31,0.37,0.447], 
                                                                        pixelsize=0.0734,
                                                                        dim1=2018, dim2=2016, 
                                                                        emin=5, emax=23,
                                                                        flag = 10, noisy_data=noisy_data,
                                                                        remove_peaks = remove_peaks)
        length.append(len(s_posx))
        
    length = np.array(length)
    
    print(material_, np.average(length), np.std(length))
    

100%|██████████| 1/1 [00:03<00:00,  3.95s/it]

Al2TiO5 490.87 6.183292003455765



