In [101]:
with open('DECAY_BELLE2.DEC') as f:
    dectxt = f.read()

Useful resources:
* https://tomassetti.me/parsing-in-python/
* https://github.com/lark-parser/lark/blob/master/docs/lark_cheatsheet.pdf
* https://github.com/lark-parser/lark/blob/9f666a74595ff8ac0f550abbb687a517fe5d495d/lark/grammars/common.lark
* https://gitlab.cern.ch/lhcb-datapkg/Gen/DecFiles/tree/master/dkfiles
* https://github.com/PMunkes/evtgen/blob/master/DECAY.DEC


In [141]:
def strip_comments(dec):
    if isinstance(dec, list):
        lines = dec
    elif isinstance(dec, str):
        lines = dec.strip().split('\n')
    else:
        raise Exception('Parameter given is neither a str or a list')
    res = []
    for line in lines:
        line = line.strip(' ')
        if line.startswith('#'):
#             print(f'This line is comment: {line}')
            continue
        pos = line.find('#')
        if pos >= 0:
            res.append(line[pos:])
        else:
            res.append(line)
    return res

def find_all_particles(dec, printline = False):
    """
    Find all lines starting with 'Decay'
    """
    particles = set()
    dec_lines = strip_comments(dec)
    for i, line in enumerate(dec_lines):
        if line.lower().startswith('decay'):
            name = line.split()[1]
            particles.add(name)
            if printline:
                print(f"{i} Decay {name}")
            within_decay = True
        if line.lower().startswith('enddecay'):
            if within_decay:
                pass
            else:
                raise Error("A possible EndDecay without a matching Decay!")
    return particles

In [143]:
#print('\n'.join(strip_comments(dectxt)))

In [119]:
particles = find_all_particles(dectxt)
print(f"{len(particles)} particles in this dec file")

317 particles in this dec file


In [123]:
def find_possible_models(dec):
    dec_lines = strip_comments(dec)
    models = set()
    for line in dec_lines:
        # Remove trailing comments first
        words = line.strip().split(' ')
#         print(words)
        # The decay model keyword should not be the first word of a line
        for word in words[1:]:
            word = word.strip()
            if word.isupper():
                models.add(word)
    return models

In [144]:
#find_possible_models(dectxt)

In [145]:
#

In [31]:
from lark import Lark

In [15]:
for m in sorted(models):
    print(f'"{m}"', end = "|")

"BCL"|"BGL"|"BTO3PI_CP"|"BTOSLLALI"|"BTOSLLBALL"|"BTOXSGAMMA"|"BTOXSLL"|"CB3PI-MPP"|"CB3PI-P00"|"D_DALITZ"|"ETAPRIME_DALITZ"|"ETA_FULLDALITZ"|"ETA_PI0DALITZ"|"GOITY_ROBERTS"|"HELAMP"|"HQET3"|"ISGW2"|"LLSW"|"OMEGA_DALITZ"|"PARTWAVE"|"PHSP"|"PI0_DALITZ"|"PYTHIA"|"SLN"|"SSD_CP"|"STS"|"SVP_HELAMP"|"SVS"|"SVV_CP"|"SVV_HELAMP"|"TAUHADNU"|"TAULNUNU"|"TAUSCALARNU"|"TAUVECTORNU"|"TSS"|"TVS_PWAVE"|"VLL"|"VSP_PWAVE"|"VSS"|"VSS_BMIX"|"VUB"|"VVP"|"VVPIPI"|"VVS_PWAVE"|"YMSTOYNSPIPICLEO"|"number"|

In [180]:
l = Lark(r'''
start : (line | _NEWLINE)+ (_END _NEWLINE*)?
line : (define | alias | chargeconj | commands | decay | cdecay | setlspw) _NEWLINE

_END : "End"

setlspw : "SetLineshapePW" label label label SIGNED_NUMBER

cdecay : "CDecay" label

define : "Define" label SIGNED_NUMBER

alias : "Alias" label label

chargeconj : "ChargeConj" label label

commands : yesphotos | nophotos
yesphotos : "yesPhotos" 
nophotos : "noPhotos"

decay : "Decay" particle _NEWLINE (decayline | _NEWLINE )+ "Enddecay"
decayline : (partdecay | wholedecay) ";"+ _NEWLINE
partdecay : SIGNED_NUMBER particle+ photos? model
wholedecay : SIGNED_NUMBER photos? model
photos : "PHOTOS"

label : LABEL
particle : LABEL // Add full particle parsing here


model : model_generic
model_generic : MODEL_NAME model_options?
model_options : (SIGNED_NUMBER | LABEL | _NEWLINE)+ 

// model_helamp : "HELAMP" (SIGNED_NUMBER SIGNED_NUMBER)+


// Terminal defintions
// To use a fast parser, we need to avoid conflicts

%import common.WS_INLINE
%import common.SIGNED_NUMBER
%import common.NEWLINE -> _NEWLINE  // _ means filter this out of the tree

// We must set priorities here to use lalr - match model name above label, and label above something else
MODEL_NAME.2 : "BTO3PI_CP"|"BTOSLLALI"|"BTOSLLBALL"|"BTOXSGAMMA"|"BTOXSLL"|"CB3PI-MPP"|"CB3PI-P00"|"D_DALITZ"|"ETA_DALITZ"|"GOITY_ROBERTS"|"HELAMP"|"HQET"|"ISGW2"|"OMEGA_DALITZ"|"PARTWAVE"|"PHSP"|"PI0_DALITZ"|"PYTHIA"|"SLN"|"STS"|"SVP_HELAMP"|"SVS"|"SVV_HELAMP"|"TAUHADNU"|"TAULNUNU"|"TAUSCALARNU"|"TAUVECTORNU"|"TSS"|"TVS_PWAVE"|"VLL"|"VSP_PWAVE"|"VSS"|"VSS_BMIX"|"VUB"|"VVP"|"VVPIPI"|"VVS_PWAVE"|"BGL"|"LLSW"|"BCL"|"SSD_CP"|"SVV_CP"|"GOITY_ROBERTS"|"ETA_PI0DALITZ"|"ETA_FULLDALITZ"|"ETAPRIME_DALITZ"|"YMSTOYNSPIPICLEO"
LABEL : /[a-zA-Z0-9\/\-+*_()']+/
COMMENT : /[#][^\n]*/

// We should ignore comments
%ignore COMMENT

// Disregard spaces in text
%ignore WS_INLINE
''' , debug=True, parser='lalr', lexer='standard')

In [178]:
dec_text = '\n'.join(dec_lines) + '\n'

ll = l.parse(dec_text)
print(ll.pretty()[:200])

start
  line
    define
      label	qoverp_incohMix_B_s0
      1.0
  line
    define
      label	dm_incohMix_B_s0
      17.8e12
  line
    define
      label	qoverp_incohMix_B0
      1.0
  line
    de


In [179]:
dec_lines[6581:]

['0.187    gamma  Upsilon       HELAMP 1. 0. 1.7320508 0. 2.4494897 0.',
 '2.4494897 0. 1.7320508 0. 1. 0.;  #best values from BBR 2014, using B(Y2S->gamma chib)',
 '0.813         g       g       PYTHIA         32;',
 'Enddecay',
 'Decay chi_b0(2P)',
 '0.0039    gamma  Upsilon             HELAMP 1. 0. 1. 0.;  #best values from BBR 2014, using B(Y3S->gamma chib)',
 '0.0139    gamma  Upsilon(2S)         HELAMP 1. 0. 1. 0.;',
 '0.9822          g       g       PYTHIA         32;',
 'Enddecay',
 'Decay chi_b1(2P)',
 '0.0995    gamma  Upsilon       HELAMP 1. 0. 1. 0. -1. 0. -1. 0.; #best values from BBR 2014, using B(Y3S->gamma chib)',
 '0.1997    gamma  Upsilon(2S)   HELAMP 1. 0. 1. 0. -1. 0. -1. 0.;',
 '0.0163     Upsilon omega      PHSP;',
 '0.00910    chi_b1  pi+  pi-                PHSP;',
 '0.00455    chi_b1  pi0  pi0                PHSP;',
 '0.167713         d       anti-d  PYTHIA         32;',
 '0.167713         u       anti-u  PYTHIA         32;',
 '0.167713         s       anti-s  

In [62]:
print(dec_text[:200])

Define qoverp_incohMix_B_s0 1.0
Define dm_incohMix_B_s0 17.8e12
Define qoverp_incohMix_B0 1.0
Define dm_incohMix_B0 0.507e12
Define dm 0.507e12
Define alpha 1.365
Define beta  0.39
Define gamma 1.387



In [460]:
print(*[i+1 for i,t in enumerate(txt) if 'Enddecay' in t])

193 222 1485 2702 3709 4711 4727 4730 4733 4736 4739 4742 5215 5662 5680 5697 5763 5773 5778 5782 5786 5791 5795 5942 6084 6088 6276 6281 6462 6563 6662 6669 6673 6677 6681 6696 6700 6704 6708 6713 6717 6721 6725 6742 6748 6754 6760 6768 6773 6779 6785 6789 6793 6799 6805 6813 6817 6821 6825 6831 6837 6843 6849 6865 6872 6876 6882 6888 6894 6900 6908 6911 6914 6917 6920 6923 6929 6934 6939 6943 6948 6952 6964 6976 6988 7000 7010 7020 7030 7040 7053 7066 7079 7092 7105 7114 7123 7132 7144 7153 7162 7171 7175 7179 7183 7187 7192 7196 7206 7218 7222 7225 7228 7242 7254 7278 7283 7287 7291 7301 7311 7323 7328 7332 7336 7343 7347 7351 7359 7369 7389 7396 7409 7421 7431 7437 7444 7450 7455 7461 7484 7507 7530 7553 7576 7599 7622 7645 7682 7690 7842 7940 7965 7993 8028 8086 8131 8183 8196 8200 8207 8231 8253 8278 8289 8297 8314 8332 8341 8364 8403 8407 8416 8427 8459 8472 8489 8497 8512 8531 8541 8545 8548 8551 8554 8661 8666 8669 8676 8681 8684 8688 8691 8711 8730 8734 8737 8743 8748 8752 87

In [461]:
for n, line in enumerate(txt):
    q = line.split("#")[0].strip()
    if q and 'SetLineshapePW' in q:
        print(n, q)

6683 SetLineshapePW D_1+ D*+ pi0 2
6684 SetLineshapePW D_1+ D*0 pi+ 2
6685 SetLineshapePW D_1- D*- pi0 2
6686 SetLineshapePW D_1- anti-D*0 pi- 2
6687 SetLineshapePW D_10 D*0 pi0 2
6688 SetLineshapePW D_10 D*+ pi- 2
6689 SetLineshapePW anti-D_10 anti-D*0 pi0 2
6690 SetLineshapePW anti-D_10 D*- pi+ 2
6727 SetLineshapePW D_2*+ D*+ pi0 2
6728 SetLineshapePW D_2*+ D*0 pi+ 2
6729 SetLineshapePW D_2*- D*- pi0 2
6730 SetLineshapePW D_2*- anti-D*0 pi- 2
6731 SetLineshapePW D_2*0 D*0 pi0 2
6732 SetLineshapePW D_2*0 D*+ pi- 2
6733 SetLineshapePW anti-D_2*0 anti-D*0 pi0 2
6734 SetLineshapePW anti-D_2*0 D*- pi+ 2


In [462]:
print(''.join(txt[-5:]))

#0.000000007 mu+     mu-     gamma   gamma                   PHSP;  #[New mode added] #[Reconstructed PDG2011]
Enddecay

End




In [530]:
%%time
parsed = l.parse(''.join(txt))
bool(parsed)

CPU times: user 1.21 s, sys: 17.9 ms, total: 1.23 s
Wall time: 1.25 s


In [531]:
parsed.children[-1]

Tree(line, [Tree(decay, [Tree(particle, [Token(LABEL, 'K_L0')])])])