# Create simualted datasets

For both the training and testing datasets.

The processing steps include:
- Domain randomisation (relrod, spot_spread)
- Multiple phases
- Corruption of data

In [55]:
# Packages
%matplotlib qt
import numpy as np
import hyperspy.api as hs
import pyxem as pxm
import diffpy.structure
from matplotlib import pyplot as plt
from tempfile import TemporaryFile
from diffsims.libraries.structure_library import StructureLibrary
from diffsims.generators.diffraction_generator import DiffractionGenerator
from diffsims.generators.library_generator import DiffractionLibraryGenerator, VectorLibraryGenerator
from pyxem.utils.sim_utils import sim_as_signal
import tqdm
import gc
import os

In [56]:
### Variables

# Paths
root = r'C:\Users\Sauron\Documents\jf631\SED_scripts'
structures_path = os.path.join(root, 'nn_models/crystal_phases')
phase_files = ['cubic_fapbi_scaled.cif', 'pbi2.cif', 'pbbr2.cif', 'pb.cif']

# Calibration values
calibration = 0.0046

# Domain amplification
simulated_direct_beam_bool = [False,]# True,]
relrod_list = [0.002, 0.01, 0.02, 0.03]
spot_spread_list = [0.006, 0.01, 0.014, 0.018, 0.022]

# Simulation microscope values
detector_size = 515 #px
beam_energy = 200.0 #keV
wavelength = 2.5079e-12 #m
detector_pix_size = 55e-6 #m
from pyxem.detectors import Medipix515x515Detector
detector = Medipix515x515Detector()

# Processing values
n_angle_points = 150
corrupt_n_times = 2

cropping_start = 0.11
cropping_stop = 1.30
sqrt_signal = False

## Simulate data for each phase

In [57]:
phase_dict = {}
for phase in phase_files:
    name = phase.split(".")[0]
    phase_dict[name] = diffpy.structure.loadStructure(os.path.join(structures_path, phase))

print('n_phases = {}'.format(len(phase_dict)))

n_phases = 4


In [58]:
def get_random_euler(npoints):
    radius = 1
    np.random.seed(1)
    u = np.random.randint(-100,100+1,size=(npoints,))/100 
    u2 = 2*np.pi*np.random.random(size=(npoints,))
    theta = 2*np.pi*np.random.random(size=(npoints,))
    x = radius*np.sqrt(1-u**2)*np.cos(theta)
    y = radius*np.sqrt(1-u**2)*np.sin(theta)
    z = radius*u 
    phi = np.arccos(z/radius)
    eulerAlpha = u2
    eulerBeta = phi
    eulerGamma = theta
    return np.array([np.rad2deg(eulerAlpha),np.rad2deg(eulerBeta),np.rad2deg(eulerGamma)]).T 


def get_reciprocal_radius(detector_size, calibration):
    half_pattern_size = detector_size // 2
    reciprocal_radius = calibration * half_pattern_size
    return reciprocal_radius


def create_diffraction_library(phase_dict, euler_list,
                                       beam_energy, relrod_length,
                                       calibration, detector_size,
                                       with_direct_beam):

    phase_names = list(phase_dict.keys())
    phases = list(phase_dict.values())
    euler_list_n = [euler_list, ] * len(phase_names)

    sample_lib = StructureLibrary(phase_names, phases, euler_list_n)
    ediff = DiffractionGenerator(beam_energy, relrod_length)
    diff_gen = DiffractionLibraryGenerator(ediff)

    reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
    library = diff_gen.get_diffraction_library(sample_lib,
                                               calibration=calibration,
                                               reciprocal_radius=reciprocal_radius,
                                               half_shape=(detector_size//2, detector_size//2),
                                               with_direct_beam=with_direct_beam)
    return library

In [59]:
data = {}
for key, val in phase_dict.items():
    data[key] = []
for with_direct_beam in simulated_direct_beam_bool:
    for relrod_length in tqdm.tqdm(relrod_list):
        for spot_spread in spot_spread_list:

            euler_list = get_random_euler(n_angle_points)

            library = create_diffraction_library(phase_dict, euler_list,
                                                 beam_energy, relrod_length,
                                                 calibration, detector_size,
                                                 with_direct_beam)

            reciprocal_radius = get_reciprocal_radius(detector_size, calibration)
            for euler in euler_list:
                for phase in library.keys():
                    pattern = sim_as_signal(library.get_library_entry(phase=phase,
                                                                      angle=euler)['Sim'],
                                            detector_size, spot_spread, reciprocal_radius)

                    data[phase].append(pattern)

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/150 [00:00<?, ?it/s][A
 10%|█         | 15/150 [00:00<00:00, 148.75it/s][A
 21%|██        | 31/150 [00:00<00:00, 149.90it/s][A
 32%|███▏      | 48/150 [00:00<00:00, 153.38it/s][A
 43%|████▎     | 65/150 [00:00<00:00, 155.91it/s][A
 54%|█████▍    | 81/150 [00:00<00:00, 156.73it/s][A
 65%|██████▌   | 98/150 [00:00<00:00, 159.21it/s][A
 76%|███████▌  | 114/150 [00:00<00:00, 159.05it/s][A
 87%|████████▋ | 131/150 [00:00<00:00, 160.64it/s][A
 99%|█████████▊| 148/150 [00:00<00:00, 162.02it/s][A
                                                  [A
  0%|          | 0/150 [00:00<?, ?it/s][A
 13%|█▎        | 20/150 [00:00<00:00, 198.34it/s][A
 27%|██▋       | 41/150 [00:00<00:00, 199.50it/s][A
 41%|████▏     | 62/150 [00:00<00:00, 200.31it/s][A
 55%|█████▌    | 83/150 [00:00<00:00, 202.63it/s][A
 69%|██████▉   | 104/150 [00:00<00:00, 202.52it/s][A
 83%|████████▎ | 125/150 [00:00<00:00, 203.61it/s][A
 97%|█████████▋| 145/1

This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no longer takes a maximum_excitation_error
This class changed in v0.3 and no long

In [60]:
# Stack data
import dask.array as da

for i, value in enumerate(data.values()):
    list_data = da.from_array([x.data for x in value], chunks=(10, detector_size, detector_size))

    if i ==0:
        #list_data = np.expand_dims(list_data, 1)
        training_data = list_data
    else:
        #list_data = np.expand_dims(list_data, 1)
        training_data = da.vstack([training_data, list_data],)

del data
del library
del list_data
gc.collect()

shape = (len(phase_dict.keys()),
         n_angle_points*len(relrod_list)*len(spot_spread_list)*len(simulated_direct_beam_bool),
         detector_size,
         detector_size)

training_data = training_data.reshape(shape)
training_data = pxm.LazyElectronDiffraction2D(training_data)
training_data.set_diffraction_calibration(calibration)
print(training_data)

<LazyElectronDiffraction2D, title: , dimensions: (3000, 4|515, 515)>


## Recenter

In [63]:
shiftList = np.zeros((np.size(training_data.data,0),
                      np.size(training_data.data,1),
                      2,)
                     )

shiftList[:,:,0]=0.5
shiftList[:,:,1]=0.5

shiftList = shiftList.reshape(-1, shiftList.shape[-1]) # Flatten the 2D navigtion axis

training_data.compute()
training_data.align2D(shifts=shiftList,crop=False,fill_value=0., parallel=True)

name = '2D_simulated_data_{}classes_{}neuler_domainrand_centered_{}cal.hspy'.format(np.size(training_data.data,0),  n_angle_points, calibration)
#training_data.save(os.path.join('2d_simulated_data', name))
print(training_data)



[########################################] | 100% Completed | 29.9s
<ElectronDiffraction2D, title: , dimensions: (3000, 4|515, 515)>


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12000.0), HTML(value='')))

## Integrate radially

In [64]:
camera_length = detector_pix_size / (wavelength * calibration * 1e10)

In [65]:
training_data.unit = "k_A^-1"
training_data.set_experimental_parameters(beam_energy=beam_energy)
radial_steps = np.ceil((int(detector_size/2) - 1)/2)*2
training_data_1D = training_data.get_azimuthal_integral1d(npt_rad=radial_steps,
                                                          center=([detector_size/2,detector_size/2]),
                                                          detector=detector,
                                                          detector_dist=camera_length,
                                                          )
print(training_data_1D)


[0, 10.497834414598954]
<ElectronDiffraction1D, title: , dimensions: (3000, 4|256)>


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12000.0), HTML(value='')))

## Corrupt data several times

In [66]:
def create_random_dampening_profile(signal_array):
    sig_len = len(signal_array)
    dumb = np.repeat(np.random.choice([0,1,1],38),(sig_len//50))
    dumb1 = np.append(dumb,np.zeros([sig_len-len(dumb),]))
    dumbrnd = np.repeat(np.random.rand(15,),sig_len//15)
    dumbrnd1 = np.append(dumbrnd,np.zeros([sig_len-len(dumbrnd),]))
    dempening_profile = dumb1 * dumbrnd1
    return dempening_profile

def dampen_signal(signal_array):
    dampening_profile = create_random_dampening_profile(signal_array)
    return signal_array * dampening_profile


In [67]:
training_data_1D_corrupted = training_data_1D.data

for i in range(corrupt_n_times):
    damped = training_data_1D.map(dampen_signal, inplace=False, parallel=True)
    training_data_1D_corrupted = np.append(training_data_1D_corrupted, damped, axis=1)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12000.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12000.0), HTML(value='')))

## Crop and normalise and sqrt

In [68]:
training_data_1D_corrupted = pxm.ElectronDiffraction1D(training_data_1D_corrupted,)
training_data_1D_corrupted.axes_manager.signal_axes[0].scale = training_data_1D.axes_manager.signal_axes[0].scale
training_data_1D_corrupted.axes_manager.signal_axes[0].offset = training_data_1D.axes_manager.signal_axes[0].offset


training_data_1D_corrupted.crop_signal1D(cropping_start, cropping_stop)

if sqrt_signal:
    training_data_1D_corrupted.data = np.sqrt(training_data_1D_corrupted.data)

dpmax = training_data_1D_corrupted.data.max(2)
training_data_1D_norm = training_data_1D_corrupted.data/dpmax[:,:,np.newaxis]

print(training_data_1D_norm.shape)

(4, 9000, 182)


  if sys.path[0] == '':


## NN requirements: reshape and labelling

In [69]:
training_data_1D_norm = training_data_1D_norm.reshape(-1, training_data_1D_norm.shape[-1])

print(training_data_1D_norm.shape)

(36000, 182)


In [71]:
# Create labels
n_phases = len(phase_dict)
labels = np.zeros((n_phases, int(training_data_1D_norm.shape[0]/n_phases)))
for i in range(n_phases):
    labels[i,:] = i

training_labels = labels.flatten()

In [None]:
# Check for outliers and nan values
where_nan = np.argwhere(np.isnan(training_data_1D_norm))
training_data_1D_norm = np.delete(training_data_1D_norm, where_nan[:,0], axis = 0)
training_labels = np.delete(training_labels, where_nan[:,0], axis = 0)
print(training_data_1D_norm.shape, training_labels.shape)

In [72]:
store_train_data = TemporaryFile()
x = training_data_1D_norm
y = training_labels
phase_names = list(phase_dict.keys())

In [73]:
np.savez('1D_simulated_data_{}classes_{}neuler_domainrand_{}ncorrupted'.format(n_phases,  n_angle_points, corrupt_n_times), x=x, y=y, phases=phase_names)

In [1]:
print(training_data_1D_corrupted.data.shape)


NameError: name 'training_data_1D_corrupted' is not defined