In [1]:
# only for development
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from diffractem import io, tools
from diffractem.stream_parser import StreamParser, make_substream
import numpy as np
import pandas as pd
import os
import matplotlib
import seaborn as sns
bin_path = '/opts/crystfel_latest/bin/' # might be different than standard
from glob import glob
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

# Merging and anaysis of dose-fractionated sets
...works essentially just as in `merging.ipynb`, just additionally making use of `partialator`s `custom-split` capability to make sub-sets with the individual frame numbers.

To make this work, we first need to get a `hits_allframe_split.txt` file which contains labels for the custom split, that is, the frame number per crystal, to which each Event in the file belongs.

In [3]:
stream = StreamParser('streams/hits_allframe.stream')
print(f'Stream file contains {stream.num_crystals} crystals in {stream.num_shots} shots')
split_data = stream.shots[['file', 'Event', 'header/int//%/shots/frame']]
split_data.to_csv(stream.filename.rsplit('.', 1)[0] + '_split.txt', index=False, header=False, sep=' ')

!head -n 11 streams/hits_allframe_split.txt # have a look

Stream file contains 12750 crystals in 13220 shots
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//0 0
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//1 1
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//2 2
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//3 3
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//4 4
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//5 5
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//6 6
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//7 7
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//8 8
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//9 9
proc_data/LysoS1_001_00000_allframe_hit.h5 entry//10 0


## Preparation of a partialator script
...just as in `merging.ipynb`, just that we do not scan the `stop-after`, but instead set `split=True`.

In [4]:
# get a list of stream files
stream_list = glob('streams/hits_allframe.stream')

popts = {'no-polarisation': True, 'no-Bscale': False, 'no-scale': False, 
        'force-bandwidth': 2e-5,  'force-radius': False, 'force-lambda': 0.0251,
            'push-res': 1.4,  'min-measurements': [3, ], 'model': ['unity', 'xsphere'],
            'symmetry': '422', 'no-logs': False, 'iterations': 3, 'j': 10}

# you need to set those if you want to use slurm to submit merging runs
slurm_opts = {'C': 'scratch', 
                'partition': 'medium', 
                'time': '"04:00:00"',
                'nodes': 1}

settings = tools.call_partialator(stream_list, popts, par_runs=4, 
                       split=True, out_dir='merged_frac',
                       slurm=False, cache_streams=False, 
                       slurm_opts=slurm_opts)

!chmod +x partialator_run.sh

Please run partialator_run.sh to start merging.


In [None]:
# example how to send data to a cluster
# !scp -r streams rbuecke1@transfer.gwdg.de:~/SHARED/EDIFF/temp
!scp partialator_run.sh rbuecke1@transfer.gwdg.de:~/SHARED/EDIFF/temp

In [None]:
# example how to get it back from a cluster
# %mkdir merged
# !scp 'rbuecke1@transfer.gwdg.de:~/SHARED/EDIFF/temp/merged/*.hkl*' merged/

## Analyze and validate results
...again, the only difference to `merging.ipynb` being that `custom_split=True` in the first command.

In [5]:
# check what hkls we have available....
settings = tools.get_hkl_settings('merged_frac/hits_allframe*-*.hkl', unique_only=True, custom_split=True)

if 'input' in settings.columns:
    settings['input'] = settings['input'].str.rsplit('/', 1, expand=True).iloc[:,-1]

default_symmetry = '422'
highres = 1.75 # highest shell, in A
nshells = 10

# tools.analyze_hkl() #...is used using ProcessPoolExecutor

ftrs = {}
with ProcessPoolExecutor() as exc:
    for _, s in settings.iterrows():
        ftrs[s.hklfile] = exc.submit(tools.analyze_hkl, fn=s.hklfile, cell='refined.cell', 
                              point_group=s.symmetry if 'symmetry' in s else default_symmetry, 
                              highres=highres, nshells=nshells, bin_path='/opts/crystfel_master/bin')    

err = {lbl: v.exception() for lbl, v in ftrs.items() if v.exception()}
if err:
    print('Analysis gave errors!', str(err))
out = {lbl: v.result() for lbl, v in ftrs.items() if not v.exception()}

sd = pd.concat([v.result()[0].assign(hklfile=lbl) 
                for lbl, v in ftrs.items() 
                    if not v.exception()], axis=0).merge(
    settings, on='hklfile')

overall = pd.concat([pd.DataFrame(v.result()[1], index=[lbl])
                for lbl, v in ftrs.items() 
                     if not v.exception()], axis=0).merge(
    settings, right_on='hklfile', left_index=True).rename(
    columns={'<snr>': 'SNR', 'redundancy': 'Red', 'completeness': 'Compl', 'CC*': 'CCstar'})

# write out results
%rm -f shell/*
for ident, grp in sd.groupby(['hklfile']):
    grp.sort_values('Center 1/nm')[['Center 1/nm', 'nref', 'Possible', 'Compl', 'Meas', 'Red', 'SNR',
           'Mean', 'd/A', 'Min 1/nm', 'Max 1/nm', 'CC', 'CCstar',
           'Rsplit']].to_csv(f'shell/{ident.rsplit("/",1)[-1]}.csv', index=False, float_format='%.2f')

#### Example to show results
...using the result DataFrame's `pivot` function.

In [6]:
# convenient function to get FOMs. Set the one you want as 'value'
sd.pivot(index='d/A', columns='hklfile', values=['CC']).sort_index(ascending=False)

Unnamed: 0_level_0,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC,CC
hklfile,merged_frac/hits_allframe__3__unity-0.hkl,merged_frac/hits_allframe__3__unity-1.hkl,merged_frac/hits_allframe__3__unity-2.hkl,merged_frac/hits_allframe__3__unity-3.hkl,merged_frac/hits_allframe__3__unity-4.hkl,merged_frac/hits_allframe__3__unity-5.hkl,merged_frac/hits_allframe__3__unity-6.hkl,merged_frac/hits_allframe__3__unity-7.hkl,merged_frac/hits_allframe__3__unity-8.hkl,merged_frac/hits_allframe__3__unity-9.hkl,merged_frac/hits_allframe__3__xsphere-0.hkl,merged_frac/hits_allframe__3__xsphere-1.hkl,merged_frac/hits_allframe__3__xsphere-2.hkl,merged_frac/hits_allframe__3__xsphere-3.hkl,merged_frac/hits_allframe__3__xsphere-4.hkl,merged_frac/hits_allframe__3__xsphere-5.hkl,merged_frac/hits_allframe__3__xsphere-6.hkl,merged_frac/hits_allframe__3__xsphere-7.hkl,merged_frac/hits_allframe__3__xsphere-8.hkl,merged_frac/hits_allframe__3__xsphere-9.hkl
d/A,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
6.8,0.894263,0.89057,0.8982,0.881846,0.904125,0.905594,0.904968,0.898064,0.894174,0.918648,0.953118,0.965225,0.959272,0.951108,0.959749,0.952103,0.959978,0.941939,0.929941,0.948518
3.34,0.863738,0.892459,0.873871,0.862829,0.877579,0.864587,0.840571,0.819184,0.779658,0.711205,0.932509,0.948722,0.946435,0.936658,0.942141,0.934908,0.915394,0.896652,0.856425,0.831651
2.79,0.727442,0.809905,0.784169,0.744783,0.680304,0.61031,0.511153,0.38561,0.250364,0.159529,0.8363,0.887924,0.852896,0.82309,0.76352,0.698603,0.63115,0.471586,0.325904,0.25371
2.49,0.569118,0.682201,0.652567,0.534936,0.39566,0.342205,0.201389,0.095184,0.11179,0.021958,0.658808,0.756171,0.714873,0.658713,0.523855,0.331639,0.275017,0.164414,0.14918,0.030346
2.29,0.540939,0.675665,0.585916,0.505474,0.38344,0.174699,0.163945,0.085382,0.02945,-0.00908,0.603564,0.731453,0.70407,0.616669,0.502047,0.241041,0.190163,0.121846,-0.001913,0.033664
2.14,0.373143,0.491047,0.451207,0.326829,0.225954,0.160447,0.048699,0.007928,0.024893,0.018319,0.446855,0.580275,0.534136,0.426494,0.262028,0.141423,0.083689,0.081256,-0.040357,0.032716
2.02,0.310046,0.448413,0.304287,0.215154,0.110463,0.082001,0.012723,0.028468,-0.03158,-0.049409,0.36609,0.482705,0.368866,0.28393,0.202974,0.071181,0.069529,0.017684,0.037963,-0.061568
1.93,0.252844,0.371027,0.244866,0.124577,0.059729,0.040893,0.027477,-0.006687,0.0445,0.055252,0.352049,0.394984,0.269039,0.146659,0.033141,0.143501,-0.048009,-0.019047,0.08651,-0.058591
1.85,0.182227,0.212516,0.035432,0.126743,0.007111,0.041096,-0.00934,-0.001123,0.01505,0.039448,0.250663,0.294675,0.127213,0.189508,0.022266,-0.038238,0.058157,-0.160349,-0.109467,0.062246
1.78,0.036999,0.005975,0.055575,-0.031129,0.084117,-0.069578,0.007875,0.010234,0.043916,0.051098,0.139678,0.146471,0.090546,0.185249,0.126675,-0.030745,0.276936,-0.022099,0.051171,-0.12212


## Analysis Plot
...as Fig. 1 in `merging.ipynb`.

In [8]:
# %matplotlib inline
%matplotlib widget

model = 'unity' # 'unity' or 'xsphere'

# SETTINGS ---

fh, axs = plt.subplots(2, 2, figsize=(18/2.54,15/2.54), dpi=120, sharex=True)
lsp, lrow = 0.85, 3 # space near top left for legend, and # of legend columns

# pick your FOMs and their y ranges
FOMs = [('CC', 0, 1), ('Mean', 0, 40), ('Compl', 0, 100), ('Red', 0, 100)]
sdsel = sd.query(f'model == "{model}"')
angstrom = False # if True, show x axis in A, instead of 1/nm

# ------

try:
    import seaborn as sns
    sns.set('notebook','whitegrid') # optional. Set a style...
except:
    print('Seaborn not installed, it seems.')

axs = axs.ravel()

# ids = get_id_table(sdsel['identifier'])
idcols = [cn for cn, col in sdsel[settings.columns].iteritems() 
          if len(col.unique()) > 1 and (cn != 'hklfile')]
print('Legend is', ' '.join(idcols))

for ident, grp in sdsel.groupby(['hklfile']):
    
    ls = '-'
     
    lbl = tuple(grp[idcols].drop_duplicates().values.astype(str).ravel())
    
    for ax, (fom, ymin, ymax) in zip(axs, FOMs):
        ax.plot(grp['d/A'] if angstrom else grp['Center 1/nm'], grp[fom], 
                label=' '.join(lbl), ls=ls)
        ax.set_title(fom)
        ax.set_ylim((ymin, ymax))
        if angstrom:
            ax.set_xlim(sorted(ax.get_xlim(), reverse=True))
        ax.grid(True)
        if fom in ['CC', 'CCstar']:
            ax.axhline(0.143 if fom == 'CC' else 0.5,ls=':')
        
lg = fh.legend(*ax.get_legend_handles_labels(), ncol=lrow, 
               fontsize='xx-small', loc='lower center', 
               bbox_to_anchor=(0.5, lsp), frameon=True)
axs[-1].set_xlabel(r'Resolution shell/Å' if angstrom else r'Resolution shell/nm$^{-1}$')
plt.draw()
# lpos = lg.get_window_extent()

fh.subplots_adjust(wspace=0.3, top=lsp-0.05)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Legend is split_label
