# DESY depth profiling data analysis workbook [2020]

This is a workbook used to integrate synchrotron data from DESY then generate the bg-spline and peak-index files required for running CMWP.

## Import stuff

In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib qt
import os
import glob
import pyFAI, pyFAI.azimuthalIntegrator
from pyFAI.multi_geometry import MultiGeometry
import csv
from scipy.interpolate import CubicSpline
from shutil import copyfile
from src.cmwp_tools import load_tifs
from src.xrd_tools import getReflections

## Define settings

In [116]:
### Experiment specific settings

base = "/mnt/manchester_rds/202011_DESY/"                            # This is the base directory where the data is stored

calib_file = base + 'raw/close_detector/calib_new.poni'             # These are the calibration files (.poni) for the detectors

templates = base + "templates/"                                      # This is where the template files are stored. These are copied for each integration.

intpoints = 4000                                                  # This is how many integration points to use for pyFAI

a=3.232                                                              # a lattice paramater in angstrom
c=5.147                                                              # c lattice paramater in angstrom

baseline=[3.1, 3.4, 4.7, 5.2, 6, 7, 9.3, 9.5, 11.5, 12.55]  # What 2theta points to calculte the background spline from

limits=[3, 12.6]                                                       # 2theta bounds of integration

searchrange = int((25 / 10000) * intpoints)                         # How many data points each side of the approximate 2theta peak position to search for the true peak

idtz2_min = 14.912

In [117]:
### Sample specific settings
               
directory = base + "raw/close_detector/hyd/hyd/hyd/"                                  

outputdir = "/home/rhys/Documents/CMWP-211102/2020_11_DESY/hyd/"       # This is the output directory (normally a sub-folder in your CMWP dir)

fio_file = base + "raw/close_detector/hyd/eh3scan1_00144.fio"

darks = base + "raw/close_detector/hyd/dark"

In [118]:
#lab6 = glob.glob('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6/LaB6_00*.tif')[2:]
#lab6_dark = glob.glob('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6/dark*.tif')
#ai = pyFAI.load(calib_file)
#output = ai.integrate1d(load_tifs(lab6)-load_tifs(lab6_dark), npt=intpoints, radial_range=[2.2,11.3], correctSolidAngle=True, method='full_csr',polarization_factor=0.99, unit='2th_deg')
#np.savetxt('/mnt/manchester_rds/202011_DESY/raw/close_detector/LaB6.dat', np.array(output).T)

## Read log file and make pandas table

The .fio file contains a table of motor positions (ie idtz2, idty1), image names and whether the image was a clearing frame or actual exposure. The section reads in the file into a Pandas dataframe for ease of use.

In [119]:
# Read in column names and data type
colnames = []; formats = [];
with open(fio_file) as input_data:
    for i, line in enumerate(input_data):
        if ' Col' in line:
            colnames.append(' '.join(line.split(' ')[3:-1]))
            skip = i+1
            if 'DOUBLE' in line.split(' ')[-1]: formats.append('f4')
            if 'INTEGER' in line.split(' ')[-1]: formats.append('i4')
            if 'STRING' in line.split(' ')[-1]: formats.append('str')

# Read in log file into dataframe and remove clearing frames
df = pd.read_csv(fio_file, names = colnames, skiprows=skip, sep=' ', skipinitialspace=True) 
df = df[df.type != 'clearing']

# Get list of unique motor position values
z_values = df['idtz2'].unique()
y_values = df['idty2(encoder)'].unique()
    
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)

if not os.path.exists(outputdir):
    os.makedirs(outputdir)
if not os.path.exists(outputdir + '/0plots'):
    os.makedirs(outputdir + '/0plots')

if idtz2_min is not None:
    df = df[df['idtz2']>idtz2_min]
    df.reset_index(inplace=True)

In [120]:
# Print the dataframe
df

Unnamed: 0,index,idty2(encoder),end pos,idtz2,channel,filename,type,unix time
0,26,21.19546,21.19546,14.913,2,hyd_00346.cbf,exposure,1.606126e+09
1,27,20.69546,20.69546,14.913,2,hyd_00347.cbf,exposure,1.606126e+09
2,28,21.19546,21.19546,14.914,2,hyd_00349.cbf,exposure,1.606126e+09
3,29,21.69546,21.69546,14.914,2,hyd_00350.cbf,exposure,1.606126e+09
4,30,21.19546,21.19546,14.915,2,hyd_00352.cbf,exposure,1.606126e+09
...,...,...,...,...,...,...,...,...
171,197,21.69546,21.69546,14.998,2,hyd_00602.cbf,exposure,1.606126e+09
172,198,21.19546,21.19546,14.999,2,hyd_00604.cbf,exposure,1.606126e+09
173,199,20.69546,20.69546,14.999,2,hyd_00605.cbf,exposure,1.606126e+09
174,200,21.19546,21.19546,15.000,2,hyd_00607.cbf,exposure,1.606126e+09


## Integration, bg-spline and peak-index creation

This section takes all the images in the above table and integrates them according to the calibrations defined previously for all 4 detectors. 
This is saved as a .dat file with a prefix containing the motor positions. Then, the files in the template directory are copied with the same prefix. 
A background spline is created from the baseline points specified above and saved with the .bg-spline.dat suffix.
The a peak-index.dat file is made based on the Zr indexes specifed above.

In [121]:
# Read wavelength
with open(calib_file, 'r') as f:
    textList = f.readlines()
    for line in textList:
        if 'Wavelength' in line:
            wavelength = float(line.split(':')[1])*1e10

peak_name, peak_pos = getReflections(crystalType='hcp', a=a, c=c, wavelength=wavelength, printReflections=False)

ai = pyFAI.load(calib_file)


print('z from {0:.3f} to {1:.3f}'.format(np.min(z_values), np.max(z_values)))
print('y from {0:.3f} to {1:.3f}'.format(np.min(y_values), np.max(y_values)))

int_list = []
for index, row in df.iterrows():
    y=row['idty2(encoder)']
    z=row['idtz2']
    
    prefix = 'y_{1:.3f}_z_{0:.3f}'.format(z,y)

    file = (df[(df['idtz2']==z) & (df['idty2(encoder)']==y)])['filename'].values[0]
    print('\rCurrent:   {0} / {1}\ty = {2:.3f} / {3:.3f}\tz = {4:.3f} / {5:.3f}     [{6}] '
          .format(index+1, len(df.index), y, np.max(y_values), z, np.max(z_values), file), end='')

    file = directory + file

    
    ################### Load darks #############################################
    pe1_darks_list = glob.glob(darks + '/*.cbf')[2:]
    
    dark = load_tifs(pe1_darks_list)
    data = load_tifs(file) - dark

    output = ai.integrate1d(data, npt=intpoints, correctSolidAngle=True, method='full_csr',
                            polarization_factor=0.99, unit='2th_deg', radial_range=(limits[0], limits[1]))

    xvals=output[0]; yvals=output[1];
    #yvals = yvals / 100000
    #yvals = yvals - np.min(yvals)
    #yvals += 10000

    for templateName in glob.glob(templates + '*'):
        copyfile(templateName, outputdir + prefix + templateName.split('/')[-1][8:])

    ####################### Save integrated data ######################

    with open(outputdir + prefix + '.dat', 'w+') as f:
         np.savetxt(fname = f, X=np.transpose([xvals, yvals]), fmt = ('%1.5f'))

    ########################### Save figure ###########################

    plt.ioff()
    fig, (ax2) = plt.subplots(1, 1, figsize=(16,8))
    ax2.set_title('Integrated data');
    ax2.set_xlabel('2theta (deg)'); 
    ax2.set_ylabel('Intensity');
    ax2.plot(xvals, yvals)
    ax2.set_xlim(np.min(xvals)+0.01, np.max(xvals)-0.01)
    #ax2.set_ylim(np.min(yvals)-2, np.max(yvals)*1.1)

    x_plot_list = []
    y_plot_list = []

    ########################### Make bg-spline ###########################
    baseline_int = []
    for j in baseline:
        num_index=np.argmin(np.abs(xvals-j))

        baseline_int.append(np.mean(yvals[num_index-5:num_index+5]))

    baseline, baseline_int = (list(t) for t in zip(*sorted(zip(baseline, baseline_int))))

    with open(outputdir + prefix + '.bg-spline.dat', 'w+') as f:
        np.savetxt(fname = f, X=np.transpose([baseline, baseline_int]), fmt = ('%1.5f'))
        
    cs = CubicSpline(baseline, baseline_int)

    ax2.plot(xvals, cs(xvals))
    ax2.plot(baseline, baseline_int, 'o',c='r')

    ########################### Make peak-index ###########################

    if len(peak_pos) != len(peak_name):
        raise ValueError('peak_pos and peak_name arrays should be the same size')

    with open(outputdir + prefix + '.peak-index.dat', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        for name, pos in zip(peak_name, peak_pos):

            if np.min(xvals) < pos < np.max(xvals):         # if the peak is within the data
                
                # approx peak postion
                approx_peak_index = np.argmin(np.abs(xvals-pos))                    # get the index of the peak

                # get actual peak position
                peak_index = np.argmax(yvals[approx_peak_index-searchrange:approx_peak_index+searchrange])+approx_peak_index-searchrange
                yval = yvals[peak_index]

                # draw line and print name
                ax2.axvline(xvals[peak_index], alpha=0.1, c='r')
                ax2.text(xvals[peak_index], yval+10, name, horizontalalignment = 'center', c='r')

                intensity = yval - cs(xvals)[peak_index]

                writer.writerow(['{0:.4f} {1:.1f} {2} 0'.format(xvals[peak_index], intensity, name)])

    #Append to list
    int_list.append(np.sum(yvals - cs(xvals)))

    ## Save plots #################################

    plt.savefig(outputdir + '/0plots/plot_' + prefix + '.pdf')
    plt.yscale('log')
    plt.savefig(outputdir + '/0plots/log_' + prefix + '.pdf')
    plt.close()


z from 14.900 to 15.000
y from 20.695 to 21.695
Current:   176 / 176	y = 21.695 / 21.695	z = 15.000 / 15.000     [hyd_00608.cbf] 

## Plot integrated intensities

In [None]:
for val in df['idty1(encoder)'].unique():
    yval = np.array(int_list)[np.array(df['idty1(encoder)'].tolist() == val)]
    zval = np.array(df['idtz2'].tolist())[np.array(df['idty1(encoder)'].tolist() == val)]
    
    # Take differential and calculate edge position
    edge = zval[np.argmax(np.gradient(yval))]
    
    plt.plot(zval, yval, label = 'y = {0:.3f}   edge = {1:.3f}'.format(val, edge))
    
plt.xlabel('Z position (mm)')
plt.ylabel('Integrated intensity')
plt.show()
plt.legend(loc='lower right')

plt.savefig(outputdir + '0integrated_intensity.pdf')

## Make a bash script

Save this text into a .sh file and run it - this will execute CMWP for each file sequentially

In [None]:
cmwpfolder = "/home/rhys/Documents/CMWP-210315/"

for index, row in df.iterrows():
    y=row['idty1(encoder)']; z=row['idtz2'];    
    
    print('./evaluate ' + outputdir.split(cmwpfolder)[-1] + 'y_{1:.3f}_z_{0:.3f}.dat auto'.format(z,y))

## 2D integration

In [None]:
data = [load_tifs(file) for file in files]

output2d = ais.integrate2d(data, npt_rad=intpoints, npt_azim=3600, correctSolidAngle=True, polarization_factor=0.99)
output1d = ais.integrate1d(data, npt=intpoints, correctSolidAngle=True, polarization_factor=0.99)

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(14,12))
ax1.imshow(np.where(output2d[0]==0, np.nan, output2d[0]), vmin=0, vmax=0.5e11)

ax2.plot(output1d[0], output1d[1])
ax2.set_xlim(np.min(output1d[0]),np.max(output1d[0]))
ax2.set_yscale('log')

for name, pos in zip(peak_name, peak_pos):
    
    if np.min(output1d[0]) < pos < np.max(output1d[0]):

        frac = 1.42*intpoints*(pos-np.min(output1d[0]))/(np.max(output1d[0]))
        
        # draw line and print name
        ax2.axvline(pos, alpha=0.3, c='r')
        ax1.axvline(frac, alpha=0.3, c='r')
        
        ax1.text(frac, 3500, name, horizontalalignment = 'center', c='r')
        ax2.text(pos, np.max(output1d[1]), name, horizontalalignment = 'center', c='r')


plt.show()