## Preparing theoretical data

In [33]:
import os
from collections import OrderedDict
import pandas as pd

THEO_PATH = '../data/theoretical/'

In [34]:
%%bash
head ../data/theoretical/Y/SP1000.00_1.00e+11_0_0_4000_30.dat

  4.2564470    4.899e-02      30:  2p5 3d1     15 J=3.0    - 2s1 3d1     37   J=2.0   : 4.8991E-02 |
  4.2754430    3.217e-01      30:  2p5 3s1      3 J=1.0    - 2s1 3s1     29   J=0.0   : 3.2167E-01 |
  4.3282330    9.193e-02      30:  2p5 3d1     21 J=2.0    - 2s1 3d1     37   J=2.0   : 9.1932E-02 |
  4.3700320    8.582e-03      30:  2s1 4p1     81 J=1.0    - 2s1 5s1    141   J=0.0   : 8.5823E-03 |
  4.3714200    3.711e-02      30:  2p5 3d1     17 J=4.0    - 2s1 3d1     36   J=3.0   : 3.7106E-02 |
  4.3739900    9.262e-03      30:  2p5 3p1      4 J=1.0    - 2s1 3p1     30   J=0.0   : 9.2620E-03 |
  4.3832130    3.618e-02      30:  2p5 3p1      6 J=3.0    - 2s1 3p1     32   J=2.0   : 3.6178E-02 |
  4.3860360    8.521e-02      30:  2p5 3d1     22 J=3.0    - 2s1 3d1     37   J=2.0   : 8.5211E-02 |
  4.3905940    2.353e-02      30:  2p5 3s1      2 J=2.0    - 2s1 3s1     28   J=1.0   : 2.3527E-02 |
  4.4226470    3.382e-02      30:  2p5 3d1     15 J=3.0    - 2s1 3d1     35   J=2.0   : 3.3

Format of theoretical files.
Energy and charge state is in the files name.

In [37]:
def organizeFile(fname):
    
    wvls, ints, csts = [],[],[]
    ulevel, ulevelnr, ulevelJ = [],[],[]
    llevel, llevelnr, llevelJ = [],[],[]
    with open(fname, 'r') as f:
# ['4.2564470', '4.899e-02', '30:', '2p5', '3d1', '15', 'J=3.0', '-', '2s1', '3d1', '37', 'J=2.0', ':', '4.8991E-02', '|\n']
        for line in f:
            spl = list(filter(None, line.split(' ')))
            wvls.append(float(spl[0]))
            ints.append(float(spl[1]))
            csts.append(int(spl[2][:-1]))
            ulevel.append(spl[3]+'-'+spl[4])
            ulevelnr.append(int(spl[5]))
            ulevelJ.append(float(spl[6][2:]))
            llevel.append(spl[8]+'-'+spl[9])
            llevelnr.append(int(spl[10]))
            llevelJ.append(float(spl[11][2:]))
            
    return {'Wavelength':wvls, 'Intensity':ints, 'State':csts,\
            'UpperLevel':ulevel, 'UpperLevelNr':ulevelnr, 'UpperLevelJ':ulevelJ,\
            'LowerLevel':llevel, 'LowerLevelNr':llevelnr, 'LowerLevelJ':llevelJ
           }

In [42]:
dic = organizeFile('../data/theoretical/Y/SP1000.00_1.00e+11_0_0_4000_30.dat')
#pd.DataFrame(dic).reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
#            'LowerLevel', 'LowerLevelNr', 'LowerLevelJ'], axis=1)

In [55]:
firstloop = 1
for f in os.listdir('../data/theoretical/Y/'):
    cd = organizeFile('../data/theoretical/Y/'+f)
    energy = int(f.split('_')[-2])
    if firstloop:
        df = pd.DataFrame(cd).reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
            'LowerLevel', 'LowerLevelNr', 'LowerLevelJ'], axis=1)
        df['Energy'] = energy
        df['Element'] = 'Y'
        firstloop = 0
    else:
        cdf = pd.DataFrame(cd).reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
            'LowerLevel', 'LowerLevelNr', 'LowerLevelJ'], axis=1)
        cdf['Energy'] = energy
        cdf['Element'] = 'Y'
        df = pd.concat([df, cdf], ignore_index=True)

In [60]:
df[df.State==34]

Unnamed: 0,Wavelength,Intensity,State,UpperLevel,UpperLevelNr,UpperLevelJ,LowerLevel,LowerLevelNr,LowerLevelJ,Energy,Element
0,4.104808,0.000060,34,0-2p2,3,2.0,2s1-2p3,14,2.0,9000,Y
1,4.538766,0.000053,34,2s1-2p3,8,2.0,0-2p4,18,1.0,9000,Y
2,4.558884,0.000299,34,2s1-2p3,5,2.0,0-2p4,16,2.0,9000,Y
3,4.785688,0.000594,34,0-2p2,4,2.0,2s1-2p3,15,1.0,9000,Y
4,4.793210,0.013670,34,0-2p2,2,1.0,2s1-2p3,13,1.0,9000,Y
5,4.889556,0.002091,34,2s1-2p3,7,1.0,0-2p4,17,0.0,9000,Y
6,4.962061,0.000455,34,0-2p2,2,1.0,2s1-2p3,12,2.0,9000,Y
7,5.015648,0.073140,34,0-2p2,3,2.0,2s1-2p3,13,1.0,9000,Y
8,5.154492,0.059150,34,0-2p2,2,1.0,2s1-2p3,11,1.0,9000,Y
9,5.200836,0.018540,34,0-2p2,3,2.0,2s1-2p3,12,2.0,9000,Y
