In [1]:
# Environment Variables: 

YES = 1
NO = 0

DATA_DIR = '/home/pujan/Research/RHEED/Data/' # Change to your DATA PATH

In [2]:
# Imports for Training

import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
from scipy.optimize import curve_fit
from multiprocessing import Pool
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

%matplotlib inline

In [3]:
# Read H5 Data File:

RHEED_data_file = DATA_DIR + 'RHEED_4848_test6.h5'
spot = 'spot_2'
h5 = h5py.File(RHEED_data_file, 'r')

raw_data = []
for growth in h5.keys():
    raw_data.extend(h5[growth][spot])
raw_data = np.array(raw_data).astype(np.float32)
raw_data = np.expand_dims(raw_data, axis=-1).astype(np.float32) # if (batch_size, height, width, channels)

print(f'[Raw Images Shape]: {raw_data.shape}')

[Raw Images Shape]: (150985, 48, 48, 1)


In [4]:
# Normalize w/ image max

normalized_images = []
normalized_factor = []
for image in tqdm(raw_data):
    normalized_images.append(image / np.max(image))
    normalized_factor.append(np.max(image))
normalized_images = np.array(normalized_images).astype(np.float32)
normalized_factor = np.array(normalized_factor).astype(np.float32)


print(f'[Normalized Images Shape]: {normalized_images.shape}')

100%|██████████| 150985/150985 [00:01<00:00, 110845.19it/s]


[Normalized Images Shape]: (150985, 48, 48, 1)


In [5]:
# Estimate Labels Function
x, y = (48, 48) # image shape

x_range = np.arange(x)
y_range = np.arange(y)
X_coord, Y_coord = np.meshgrid(x_range, y_range)
X_coord = X_coord.astype(np.float64)
Y_coord = Y_coord.astype(np.float64)

sigma_x_guess = sigma_y_guess = 10 # arbitrary
theta_guess = 0 # arbitrary

# https://en.wikipedia.org/wiki/Gaussian_function
def gaussian_2d(independent_vars, x0, y0, sigma_x, sigma_y, theta):
    x, y = independent_vars

    a = np.power(np.cos(theta), 2) / (2 * np.power(sigma_x, 2)) + np.power(np.sin(theta), 2) / (2 * np.power(sigma_y, 2))
    b = -1 * np.sin(theta) * np.cos(theta) / (2 * np.power(sigma_x, 2)) + np.sin(theta) * np.cos(theta) / (2 * np.power(sigma_y, 2))
    c = np.power(np.sin(theta), 2) / (2 * np.power(sigma_x, 2)) + np.power(np.cos(theta), 2) / (2 * np.power(sigma_y, 2))
    
    return np.ravel(np.exp(-1 * (a * np.power(x - x0, 2) + 2 * b * (x - x0) * (y - y0) + c * np.power(y - y0, 2)))).astype(np.float64)

def estimate_label(image):
    image = np.squeeze(image).astype(np.float64)

    x0_guess, y0_guess = np.unravel_index(np.argmax(image), (x, y))

    popt, pcov = curve_fit(f=gaussian_2d, 
                    xdata=(X_coord, Y_coord),
                    ydata=np.ravel(image).astype(np.float64),
                    p0 = np.array((x0_guess, y0_guess, sigma_x_guess, sigma_y_guess, theta_guess)).astype(np.float64))

    return popt

In [6]:
# Estimate Labels: Clipping may be necessary!
p_count = 100 # CPU COUNT (100 is Large)
save_file = NO
RHEED_label_file = DATA_DIR + 'Estimated_Labels.npy'

# Generate
with Pool(p_count) as p:
    estimated_labels = np.array(list(tqdm(p.imap(estimate_label, normalized_images),
                                    total=len(normalized_images),
                                    position=0))).astype(np.float32)
np.save(RHEED_label_file, estimated_labels)
print(f'[Estimated Labels Shape]: {estimated_labels.shape}')

100%|██████████| 150985/150985 [00:38<00:00, 3962.66it/s]


[Estimated Labels Shape]: (150985, 5)
