# DESY depth profiling data analysis workbook [2020]

This is a workbook used to integrate synchrotron data from DESY then generate the bg-spline and peak-index files required for running CMWP.

## Import stuff

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib qt
import os
import glob
import pyFAI, pyFAI.azimuthalIntegrator
import csv
from shutil import copyfile

from src.cmwp_tools import load_tifs
from src.xrd_tools import getReflections
from src.desy_tools import load_fio
from src.cmwp_tools import getBaseline, getPeaks

## Define settings

In [2]:

#name = '02_0.15dpa_Zr4'
#dpa_val = 0.15

#lookup_val = 'y1'
#zlookup = [[-0.4, -0.2, 0, 0.2, 0.4],[13.635, 13.633, 13.632, 13.631, 13.630]]

In [5]:
### Experiment specific settings

base = "/mnt/manchester_rds/201910_DESY/"                            # This is the base directory where the data is stored

calib_file = [base + 'processed/25Oct_PE1_calib.poni', base + 'processed/25Oct_PE2_calib.poni']             # These are the calibration files (.poni) for the detectors

ais = [pyFAI.load(calib) for calib in calib_file]
wavelength = ais[0].wavelength * 1e10
print('Wavelength: {0:.8f} A'.format(wavelength))

intpoints = 2000                                                  # This is how many integration points to use for pyFAI

## Zr peaks
peak_name, peak_pos = getReflections(crystalType='hcp', a=3.232, c=5.147, wavelength=0.1848, printReflections=False)

## Background
baseline= [5.734,5.929,6.112,6.289,6.42, 6.973,7.962,8.164,9.246, 10.378,10.7, 11, 11.912, 
        12.21,12.42,13.15,14.107,14.2182,14.783,14.968,15.588,16.236,
        16.595,16.927,17.5007,18.329,18.9106,19.65, 17.88, 8.5, 8.8]            # What 2theta points to calculte the background spline from

baseline_interpolate=[]                            # What 2theta points to add in extra point
baseline_interpolate_factor=[]

limits=[5.5, 20]                                                       # 2theta bounds of integration

searchrange = int(0.005 * intpoints)                         # How many data points each side of the approximate 2theta peak position to search for the true peak

method = 'full_csr'
azimuth_range=None


Wavelength: 0.32541784 A


In [21]:
### Sample specific settings

name = '0.1 dpa'

directories = ["00_BESTDATA/02_Zr4_0p1_realScan2_top_loop1", "00_BESTDATA/02_Zr4_0p1_realScan2_top_loop2",
    "00_BESTDATA/02_Zr4_0p1_realScan2_top_loop3", "00_BESTDATA/02_Zr4_0p1_realScan2_bottom_loop1",
    "00_BESTDATA/02_Zr4_0p1_realScan2_bottom_loop2"]

outputdir = "/home/rhys/Documents/CMWP-211102/2019_DESY/0p1/"       # This is the output directory (normally a sub-folder in your CMWP dir)


darks = base + "raw/close_detector/hyd/dark"
templates = base + "templates/hyd/"                                      # This is where the template files are stored. These are copied for each integration.
z_range = None
y_range = None
spp=False; hyd=False;

In [13]:
## Integrate lab6

In [14]:
#lab6 = glob.glob('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6/LaB6_00*.tif')[2:]
#lab6_dark = glob.glob('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6/dark*.tif')

#ai = pyFAI.load(calib_file)

#data=load_tifs(lab6)-load_tifs(lab6_dark)

In [31]:
##output = ai.integrate1d(data, npt=intpoints, radial_range=[2.2,11.3],
        correctSolidAngle=True, method='full_csr',polarization_factor=0.99, unit='2th_deg')
#np.savetxt('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6.dat', np.array(output).T)

In [35]:
#output = ai.integrate1d(data, npt=intpoints, radial_range=[2.2,11.3], azimuth_range=[90,110],
#        method='IntegrationMethod(2d int, pseudo split, histogram, cython)', correctSolidAngle=True, polarization_factor=0.99, unit='2th_deg')
#np.savetxt('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6_limazim.dat', np.array(output).T)

## Read log file and make pandas table

The .fio file contains a table of motor positions (ie idtz2, idty1), image names and whether the image was a clearing frame or actual exposure. The section reads in the file into a Pandas dataframe for ease of use.

In [25]:
# Initialise lists
idtz2_list = []; idty1_list = []; file_list = []; channel_list = [];

# Get darks
pe1_darks_list = glob.glob(base + 'raw/' + directories[0] + '/_PE1/dark*.tif')
pe2_darks_list = glob.glob(base + 'raw/' + directories[0] + '/_PE2/dark*.tif')

pe1_dark_image = load_tifs(pe1_darks_list)
pe2_dark_image = load_tifs(pe2_darks_list)

# Loop over directories
for directory in directories:

    # Read in log file
    idtz2, curr = np.loadtxt(fname = base + 'raw/' + directory + "/log.log", delimiter=" ", 
                             skiprows=7, usecols = (0, 4), unpack=True)
    fio_filename2 = np.loadtxt(fname = base + 'raw/'+ directory + "/log.log", delimiter=" ", 
                               skiprows=7, usecols = (5), dtype=str, unpack=True)

    fio_filename = []
    for k in range(len(fio_filename2)):
        fio_filename.append(base + fio_filename2[k][-25:])

    # Read in each fio file and write to lists
    # Loop over idtz2
    for i in range(len(idtz2)):
        idty1,channel = np.loadtxt(fname=fio_filename[i], skiprows=29, usecols = (1, 2), unpack=True)
        data_filename, clearing = np.loadtxt(fname=fio_filename[i], skiprows=29, usecols = (3, 4), 
                                             dtype=str, unpack=True)

        # Loop over idty1
        for j in range(len(idty1)):
            if clearing[j] != "clearing":
                    idtz2_list.append(idtz2[i])
                    idty1_list.append(idty1[j])

                    # Correct filenames (pad number with zeroes to 5 digits) and add full path
                    if channel[j]==1:
                        file_list.append(base + 'raw/'+directory+"/_PE1/sample_" + data_filename[j]
                                         .split("_")[1].zfill(9))
                    if channel[j]==2:
                        file_list.append(base + 'raw/'+directory+"/_PE2/sample_" + data_filename[j]
                                         .split("_")[1].zfill(9))
                    channel_list.append(channel[j])

# Convert to pandas dataframe
df = pd.DataFrame(data={'channel': channel_list, 'idtz2': idtz2_list, 'idty1': idty1_list, 'filename': file_list})

z_values = df['idtz2'].unique()
y_values = df['idty1'].unique()

pref = '\n';
print(pref+'name: {0}\tz from {1:.3f} to {2:.3f}\ty from {3:.3f} to {4:.3f}'.format(name, np.min(z_values), np.max(z_values), np.min(y_values), np.max(y_values)))

# Group image names by channel \ idtz2 \ idty1
new = df.groupby(by=['idtz2', 'idty1', 'channel'])['filename'].apply(list)


name: 0.1 dpa	z from 13.620 to 13.700	y from -0.400 to 0.400


In [26]:
# Print the dataframe
df

Unnamed: 0,channel,idtz2,idty1,filename
0,1.0,13.62,-0.4,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
1,1.0,13.62,-0.4,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
2,1.0,13.62,-0.2,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
3,1.0,13.62,-0.2,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
4,1.0,13.62,0.0,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
...,...,...,...,...
2855,2.0,13.70,0.2,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
2856,2.0,13.70,0.4,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
2857,2.0,13.70,0.4,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...
2858,2.0,13.70,0.4,/mnt/manchester_rds/201910_DESY/raw/00_BESTDAT...


## Integration, bg-spline and peak-index creation

This section takes all the images in the above table and integrates them according to the calibrations defined previously for all 4 detectors. 
This is saved as a .dat file with a prefix containing the motor positions. Then, the files in the template directory are copied with the same prefix. 
A background spline is created from the baseline points specified above and saved with the .bg-spline.dat suffix.
The a peak-index.dat file is made based on the Zr indexes specifed above.

In [41]:
# Get list of unique motor position values
z_values = df['idtz2'].unique()
y_values = df['idty2(encoder)'].unique()

if not os.path.exists(outputdir):
    os.makedirs(outputdir)
if not os.path.exists(outputdir + '/0plots'):
    os.makedirs(outputdir + '/0plots')
    
pe1_darks_list = glob.glob(darks + '/*.cbf')[2:]
dark = load_tifs(pe1_darks_list)

print('z from {0:.3f} to {1:.3f}'.format(np.min(z_values), np.max(z_values)))
print('y from {0:.3f} to {1:.3f}'.format(np.min(y_values), np.max(y_values)))

int_list = []
num_error = 0

for index, row in df.iterrows():
    y=row['idty2(encoder)']
    z=row['idtz2']
    
    prefix = 'y_{1:.3f}_z_{0:.3f}'.format(z,y)

    file = (df[(df['idtz2']==z) & (df['idty2(encoder)']==y)])['filename'].values[0]
    print('\rCurrent:   {0} / {1}\ty = {2:.3f} / {3:.3f}\tz = {4:.3f} / {5:.3f}     [{6}]       {7} file error(s)       '
          .format(index+1, len(df.index), y, np.max(y_values), z, np.max(z_values), file, num_error), end='')

    file = directory + file

    ################### Integrate #############################################

    try:
        data = load_tifs(file) - dark
    except:
        num_error +=1
    else:
        outputs = []
        if azimuth_range == None:
            output = ai.integrate1d(data, npt=intpoints, correctSolidAngle=True, method=method, 
                                    polarization_factor=0.99, unit='2th_deg', radial_range=(limits[0], limits[1]))
            xvals = output[0]
            yvals = output[1]
            
        else:
            ## If azimuth range is specified
            for azim in azimuth_range:
                outputs.append(ai.integrate1d(data, npt=intpoints, correctSolidAngle=True, method=method, azimuth_range=azim,
                                    polarization_factor=0.99, unit='2th_deg', radial_range=(limits[0], limits[1])))

            xvals=outputs[0][0]; 
            yvals=np.sum([output[1] for output in outputs], axis=0); 
        
        yvals = (yvals - np.min(yvals)) + 20

        for templateName in glob.glob(templates + '*'):
            copyfile(templateName, outputdir + prefix + templateName.split('/')[-1][8:])

        ####################### Save integrated data ######################

        with open(outputdir + prefix + '.dat', 'w+') as f:
             np.savetxt(fname = f, X=np.transpose([xvals, yvals]), fmt = ('%1.5f'))

        ########################### Make figure ###########################

        plt.ioff()
        fig, (ax2) = plt.subplots(1, 1, figsize=(16,8))
        ax2.set_title('Integrated data');
        ax2.set_xlabel('2theta (deg)'); 
        ax2.set_ylabel('Intensity');
        ax2.plot(xvals, yvals)
        ax2.set_xlim(np.min(xvals)+0.01, np.max(xvals)-0.01)

        ########################### Make bg-spline ###########################
      
        baseline_pos, baseline_int, cs = getBaseline(xvals, yvals, baseline, baseline_interpolate, baseline_interpolate_factor,
                                             write_to = outputdir + prefix + '.bg-spline.dat')

        ax2.plot(xvals, cs(xvals))
        ax2.plot(baseline_pos, baseline_int, 'o',c='r')

        ########################### Make Zr peak-index ###########################

        peak_pos_new, peak_name_new, peak_int_new = getPeaks(xvals, yvals, 
                                             peak_pos, peak_name, cs, searchrange, 
                                                    ax=ax2, plotcolour='r',
                                             write_to = outputdir + prefix + '.peak-index.dat')
        
        ########################
        if hyd is True:

            peak_pos_new, peak_name_new, peak_int_new = getPeaks(xvals, yvals, 
                                                 spp_peak_pos, spp_peak_name, cs, searchrange, 
                                                    mode='a', phasenumber=2, 
                                                    ax=ax2, plotcolour='g',
                                                 write_to = outputdir + prefix + '.peak-index.dat')
            peak_pos_new, peak_name_new, peak_int_new = getPeaks(xvals, yvals, 
                                                 np.array([4.1956]), np.array(['110']), cs, 1, 
                                                    mode='a', phasenumber=2, 
                                                ax=ax2, plotcolour='g',
                                                 write_to = outputdir + prefix + '.peak-index.dat')
        if spp is True:
            peak_pos_new, peak_name_new, peak_int_new = getPeaks(xvals, yvals, 
                                                 delta_peak_pos, delta_peak_name, cs, 3, 
                                                    mode='a', phasenumber=3, 
                                                ax=ax2, plotcolour='k',
                                                 write_to = outputdir + prefix + '.peak-index.dat')
        ########################
        
    #Append to list
    int_list.append(np.sum(yvals - cs(xvals)))

    ## Save plots #################################

    plt.savefig(outputdir + '/0plots/plot_' + prefix + '.pdf')
    plt.yscale('log')
    plt.savefig(outputdir + '/0plots/log_' + prefix + '.pdf')
    plt.close()


z from 14.917 to 14.975
y from 20.695 to 21.695
Current:   118 / 118	y = 20.695 / 21.695	z = 14.975 / 14.975     [hyd_00533.cbf]       0 file error(s)       

## Plot integrated intensities

In [24]:
plt.close()

for val in df['idty2(encoder)'].unique():
    yval = np.array(int_list)[np.array(df['idty2(encoder)'].tolist() == val)]
    zval = np.array(df['idtz2'].tolist())[np.array(df['idty2(encoder)'].tolist() == val)]
    
    # Take differential and calculate edge position
    edge = zval[np.argmax(np.gradient(yval))]
    
    plt.plot(zval, yval, label = 'y = {0:.3f}   edge = {1:.3f}'.format(val, edge))
    
plt.xlabel('Z position (mm)')
plt.ylabel('Integrated intensity')
plt.show()
plt.legend(loc='lower right')

plt.savefig(outputdir + '/0plots/0integrated_intensity.pdf')

## Make a bash script

Save this text into a .sh file and run it - this will execute CMWP for each file sequentially

In [36]:
cmwpfolder = "/home/rhys/Documents/CMWP-211102/"

for index, row in df.iterrows():
    y=row['idty2(encoder)']; z=row['idtz2'];    
    
    print('./evaluate ' + outputdir.split(cmwpfolder)[-1] + 'y_{1:.3f}_z_{0:.3f}.dat auto'.format(z,y))

./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.917.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_20.695_z_14.917.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.918.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.695_z_14.918.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.919.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_20.695_z_14.919.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.920.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.695_z_14.920.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.921.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_20.695_z_14.921.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.922.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.695_z_14.922.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.923.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_20.695_z_14.923.dat auto
./evaluate 2020_11_DESY/hyd_3p_80_100/y_21.195_z_14.924.dat auto
./evaluate 2020_11_DESY/h

## 2D integration

In [None]:
data = [load_tifs(file) for file in files]

output2d = ais.integrate2d(data, npt_rad=intpoints, npt_azim=3600, correctSolidAngle=True, polarization_factor=0.99)
output1d = ais.integrate1d(data, npt=intpoints, correctSolidAngle=True, polarization_factor=0.99)

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(14,12))
ax1.imshow(np.where(output2d[0]==0, np.nan, output2d[0]), vmin=0, vmax=0.5e11)

ax2.plot(output1d[0], output1d[1])
ax2.set_xlim(np.min(output1d[0]),np.max(output1d[0]))
ax2.set_yscale('log')

for name, pos in zip(peak_name, peak_pos):
    
    if np.min(output1d[0]) < pos < np.max(output1d[0]):

        frac = 1.42*intpoints*(pos-np.min(output1d[0]))/(np.max(output1d[0]))
        
        # draw line and print name
        ax2.axvline(pos, alpha=0.3, c='r')
        ax1.axvline(frac, alpha=0.3, c='r')
        
        ax1.text(frac, 3500, name, horizontalalignment = 'center', c='r')
        ax2.text(pos, np.max(output1d[1]), name, horizontalalignment = 'center', c='r')


plt.show()