# Fast Post Merging Baselining Data Processing with Python

## Some adaptations from beth's pyFAI Muktibaselines script but overall the same 

## Import modules

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os
import matplotlib as mpl
import pandas as pd        
import math

import sys
!{sys.executable} -m pip install pyFAI
import pyFAI
from pyFAI.gui import jupyter

!{sys.executable} -m pip install pybaselines
from pybaselines import Baseline
from pybaselines.utils import gaussian

pi = math.pi

# Load calibration file

calibrated using pyFAI-calib2 tool

pyFAI: https://pyfai.readthedocs.io/en/v2023.1/

In [None]:
# load the calibration .poni file 
calibration = pyFAI.load('D:/I11 Beamtime July/calib_kap.poni')  #('C:/poni/file/location/calibration.poni')
calibration

In [None]:
#load mask for detector frame
oneD_mask = np.load('D:/I11 Beamtime July/calib_kap_mask.npy') #("C:/mask/file/location/mask.npy")

# Specify folder of frames to read in

In [None]:
# folder of nxs files to read in

collection_dir = "D:/I11 Beamtime July/RAW_2D/Run_8_X2_0.5VF/" #"C:/collection/set/of/raw/nxs/files/location/"
run_id = "Run_8_X2_0.5VF"

count = 0
file_nxs = []

# Count the number of .nxs files in directory
for files in os.listdir(collection_dir):
    filename = os.fsdecode(files)
    if filename.endswith(".nxs"):
        file_nxs.append(filename)
        count += 1
print('File count:', count)
print(file_nxs)

In [None]:
#scarlett adaptations - output directory made 
#make output directory 
processing_folder_dir = "D:/I11 Beamtime July/RAW_2D/Run_8_X2_0.5VF/"

processing_folder = "05_PostM_Baselining_Processed_Patterns/"
processing_path = os.path.join(processing_folder_dir ,processing_folder)


if not os.path.exists(processing_path):
    os.makedirs(processing_path)


print("Processed pattern directory:", processing_path)

## Integration using pyFAI module
#### pyFAI: https://pyfai.readthedocs.io/en/v2023.1/

In [None]:
# importing files and 1D integrating with pyFAI

frame_sized = np.zeros((count, 2881, 2880))
int_patterns = []
a = 0

while a < count:
    with h5py.File(collection_dir+file_nxs[a], 'r') as dat:
        frame = np.array(dat["/entry1/pixium_hdf/data"][()][:]) 
        frame_sized[a] = frame.reshape(frame.shape[1:])
        int_patterns.append(calibration.integrate1d(frame_sized[a], 1000, unit=pyFAI.units.TTH_DEG, radial_range=[1,30], mask=oneD_mask))
        a += 1

two_theta = int_patterns[0][0]

In [None]:
# display all raw integrated patterns

large = []

# Create a new figure
plt.figure(figsize=(14, 14))
plt.xlabel('$2\\theta$ ($^{o}$)')
plt.xlim(1, 30)

plt.ylabel('Intensity')


for i in range(count):
    offset = i * 20  # Adjust the offset as needed
    plt.plot(two_theta, int_patterns[i][1] + offset, label=format(file_nxs[i]))
    large.append(max(int_patterns[i][1]+offset))

huge = max(large)
#plt.ylim(100,huge+20)
plt.legend()
plt.legend(loc='upper right')

In [None]:
# use pattern name to remove any ones displaying no diffraction

#remove = [61557 ,
        

# or remove none    
    
remove = []

In [None]:
# remove files we don't want to process further

remove_set = set(remove)

filtered_file_list = [file_name for file_name in file_nxs if int(file_name.split('-')[-1].split('.')[0]) not in remove_set]

filtered_no = len(filtered_file_list)
print("Number of filtered files:", filtered_no)
print("Filtered file list:", filtered_file_list)

In [None]:
# re integrate the chosen files 


filtered_frame_sized = np.zeros((count, 2881, 2880))
filtered_int_patterns = []
b = 0

for b in range(filtered_no):
    with h5py.File(collection_dir+filtered_file_list[b], 'r') as dat:
        frame = np.array(dat["/entry1/pixium_hdf/data"][()][:]) 
        filtered_frame_sized[b] = frame.reshape(frame.shape[1:])
        filtered_int_patterns.append(calibration.integrate1d(filtered_frame_sized[b], 1000, unit=pyFAI.units.TTH_DEG, radial_range=[1,30], mask=oneD_mask))
       # filtered_int_patterns.append(calibration.integrate1d(filtered_frame_sized[b], 1000, unit=pyFAI.units.TTH_DEG, radial_range=[4,30]))

two_theta2 = filtered_int_patterns[0][0]

In [None]:
# for too big array issues use this cell 
# --- Re-integrate the chosen files (memory-safe version) ---

filtered_int_patterns = []

for b in range(filtered_no):
    with h5py.File(collection_dir + filtered_file_list[b], 'r') as dat:
        # load frame as float32 to save memory
        frame = np.array(dat["/entry1/pixium_hdf/data"][()][:], dtype=np.float32)
        
        # reshape directly, no giant array preallocation
        frame = frame.reshape(frame.shape[1:])

        # integrate and append result
        filtered_int_patterns.append(
            calibration.integrate1d(
                frame,
                1000,
                unit=pyFAI.units.TTH_DEG,
                radial_range=[1, 30],
                mask=oneD_mask
            )
        )

# extract two_theta axis from first pattern
two_theta2 = filtered_int_patterns[0][0]


In [None]:
# display the final files to merge and process

large2 = []

# Create a new figure
plt.figure(figsize=(14, 14))
plt.xlabel('$2\\theta$ ($^{o}$)')
plt.xlim(1, 30)

plt.ylabel('Intensity')


for j in range(filtered_no):
    offset = j * 20# Adjust the offset as needed
    plt.plot(two_theta2, filtered_int_patterns[j][1] + offset, label=format(filtered_file_list[j]))
    large2.append(max(filtered_int_patterns[j][1]+offset))

huge2 = max(large2)
plt.ylim(10,huge2+20)
plt.legend()
plt.legend(loc='upper right')

In [None]:
# summing and averaging the raw patterns

raw_intensities = []

for k in range(filtered_no):
    raw_intensities.append(filtered_int_patterns[k][1])
    
added = np.sum(raw_intensities, axis=0)
averaged_intensities = added/filtered_no

In [None]:
# display the raw merged pattern

plt.figure(figsize=(14, 14))
plt.xlabel('$2\\theta$ ($^{o}$)')
plt.xlim(1, 30)
plt.ylim(0, 1450)
plt.ylabel('Intensity')

plt.plot(two_theta2, averaged_intensities)


# Baseline correction 

### using https://pybaselines.readthedocs.io/en/latest/ mor() baseline correction

In [None]:
# baseline correction set-up

x = two_theta2
y = averaged_intensities

baseline_fitter = Baseline(x_data=x)

In [None]:
# display baseline correction and edit half_window for a best fit

half_window = 2

plt.figure()
plt.figure(figsize=(14, 14))
plt.plot(x, y, label='data')
plt.plot(x, baseline_fitter.mor(y, half_window=half_window)[0], label=f'half_window={half_window}')
plt.xlabel('$2\\theta$ ($^{o}$)')
plt.xlim(1, 20)
#plt.ylim(80, 200)
plt.ylabel('Intensity')
plt.legend()

In [None]:
# baseline correction

corrected_data = y - baseline_fitter.mor(y, half_window=half_window)[0]

In [None]:
# display all baseline correction stages

plt.figure()
plt.figure(figsize=(14, 14))
plt.plot(x, y, label='data')
plt.plot(x, baseline_fitter.mor(y, half_window=half_window)[0], label=f'half_window={half_window}')
plt.plot(x, (corrected_data*20)+0, label='corrected_baseline')
plt.xlabel('$2\\theta$ ($^{o}$)')
plt.ylabel('Intensity /a.u.')
plt.xlim(1, 20)
plt.ylim(-5, 200)
plt.legend()

plt.savefig(processing_path + run_id + "_baseline_corr" + ".png")

In [None]:
#scarlett addition 
# Normalise so baseline starts at 0
final_corrected = corrected_data - np.min(corrected_data)

# --- Save as .xy data file ---
# Combine 2θ (x) and intensity (y) into two-column array
output_data = np.column_stack((x, final_corrected))

# Define output file paths
xy_filename = os.path.join(processing_path, f"{run_id}_final_baselinecorr.xy")
png_filename = os.path.join(processing_path, f"{run_id}_final_baselinecorr.png")

# Save the .xy file
np.savetxt(xy_filename, output_data, fmt="%.6f", header="2Theta  Intensity", comments='')

print(f"Saved XY data: {xy_filename}")

# --- Plot the final corrected pattern ---
plt.figure(figsize=(12, 6))
plt.plot(x, final_corrected, color='blue', linewidth=1.2)
plt.xlabel('$2\\theta$ ($^{o}$)', fontsize=12)
plt.ylabel('Intensity / a.u.', fontsize=12)
plt.title( run_id + ' Final Baseline-Corrected PXRD Pattern', fontsize=14)
plt.xlim(1, 30)
plt.tight_layout()

# Save the plot
plt.savefig(png_filename, dpi=300)
plt.show()

print(f"Saved plot: {png_filename}")
