## Preparing theoretical data

In [1]:
import os
from collections import OrderedDict
import pandas as pd

THEO_PATH = '../data/theoretical/'

In [2]:
%%bash
ls ../data/theoretical

Ba_21_29.dat
Dy_29_35.dat
Dy_32_39.dat
Er_34_41.dat
Mo_29_41.dat
Y


Folder structure

In [3]:
%%bash
head ../data/theoretical/Ba_21_29.dat

  1.0000130    8.127e+00      29:    0 3d*     1a J=0.0    - 3d9 5f1   159a   J=1.0   : 8.1270E+00 |
  1.0037440    2.569e-01      29:    0 3d*     1a J=0.0    - 3s1 4p1   144a   J=1.0   : 2.5689E-01 |
  1.0129800    5.429e-01      29:    0 3d*     1a J=0.0    - 3p5 4f1   137a   J=2.0   : 5.4286E-01 |
  1.0142860    3.211e+00      29:    0 3d*     1a J=0.0    - 3d9 5f1   136a   J=1.0   : 3.2110E+00 |
  1.0155400    4.444e-09      29:    0 3d*     1a J=0.0    - 3p5 4f1   134a   J=3.0   : 4.4437E-09 |
  1.0174100    5.172e-08      29:    0 3d*     1a J=0.0    - 3p5 4f1   129a   J=3.0   : 5.1725E-08 |
  1.0195420    1.426e-02      29:    0 3d*     1a J=0.0    - 3d9 5f1   121a   J=1.0   : 1.4259E-02 |
  1.0198820    6.556e-03      29:    0 3d*     1a J=0.0    - 3p5 4f1   120a   J=2.0   : 6.5564E-03 |
  1.0217320    2.067e-08      29:    0 3d*     1a J=0.0    - 3p5 4f1   118a   J=1.0   : 2.0668E-08 |
  1.0329960    1.656e-07      29:    0 3d*     1a J=0.0    - 3p5 4d1   115a   J=2.0   : 1.6

Format of theoretical files.
Energy and charge state is in the files name.

In [4]:
def organizeFile(fname):
    
    wvls, ints, csts = [],[],[]
    ulevel, ulevelnr, ulevelJ = [],[],[]
    llevel, llevelnr, llevelJ = [],[],[]
    with open(fname, 'r') as f:
# ['4.2564470', '4.899e-02', '30:', '2p5', '3d1', '15', 'J=3.0', '-', '2s1', '3d1', '37', 'J=2.0', ':', '4.8991E-02', '|\n']
        for row in f:
            firstline = 1
            for line in row.split('|'):
                if line == '\n':
                    continue
                spl = list(filter(None, line.split(' ')))
                if firstline:
                    wv = float(spl[0])
                    wvls.append(wv)
                    ints.append(float(spl[-1]))
                    csts.append(int(spl[2][:-1]))
                    ulevel.append(spl[3]+'-'+spl[4])
                    ulevelnr.append(int(spl[5].replace('a', '')))
                    ulevelJ.append(float(spl[6][2:]))
                    llevel.append(spl[8]+'-'+spl[9])
                    llevelnr.append(int(spl[10].replace('a', '')))
                    llevelJ.append(float(spl[11][2:]))
                    firstline = 0
                else:
                    #['31:', '3s1', '3p2', '-25', 'J=1.0', '-', '3d1', '4d1', '-1667', 'J=1.0', ':', '2.1350E-20']
                    wvls.append(wv)
                    ints.append(float(spl[-1]))
                    csts.append(int(spl[0][:-1]))
                    ulevel.append(spl[1]+'-'+spl[2])
                    ulevelnr.append(int(spl[3].replace('a', '')))
                    ulevelJ.append(float(spl[4][2:]))
                    llevel.append(spl[6]+'-'+spl[7])
                    llevelnr.append(int(spl[8].replace('a', '')))
                    llevelJ.append(float(spl[9][2:]))

    return {'Wavelength':wvls, 'Intensity':ints, 'State':csts,\
            'UpperLevel':ulevel, 'UpperLevelNr':ulevelnr, 'UpperLevelJ':ulevelJ,\
            'LowerLevel':llevel, 'LowerLevelNr':llevelnr, 'LowerLevelJ':llevelJ
           }

In [5]:
dic = organizeFile('../data/theoretical/Mo_29_41.dat')
df = pd.DataFrame(dic)#.reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
            #'LowerLevel', 'LowerLevelNr', 'LowerLevelJ'], axis=1)
df.head()
#df.to_csv('what.csv')

Unnamed: 0,Intensity,LowerLevel,LowerLevelJ,LowerLevelNr,State,UpperLevel,UpperLevelJ,UpperLevelNr,Wavelength
0,7.9252e-21,3p2-3d1,2.0,-103,29,3p2-6g1,2.0,1135,1.000001
1,1.2223999999999999e-20,3p2-3d1,2.0,-76,29,3p2-6g1,3.0,973,1.000006
2,1.8846e-16,3p1-3d1,3.0,-152,31,3p1-6d1,2.0,342,1.000006
3,1.2406e-22,3p2-3d1,4.0,-67,29,3p2-6g1,5.0,933,1.000009
4,3.6022e-15,3d1-4d1,3.5,-943,32,2p5-3p2,3.5,-30,1.000009


In [6]:
firstloop = 1
elist = {'Mo':42, 'Ba':56, 'Dy':66, 'Er':68}

for f in os.listdir('../data/theoretical/'):
    if f == 'Y':
        continue
    energy = int(input(f+': '))
    cd = organizeFile('../data/theoretical/'+f)
    element = f.split('/')[-1].split('_')[0]
    if firstloop:
        df = pd.DataFrame(cd)
        df['Energy'] = energy
        df['Element'] = elist[element]
        firstloop = 0
    else:
        cdf = pd.DataFrame(cd)
        cdf['Energy'] = energy
        cdf['Element'] = elist[element]
        df = pd.concat([df, cdf], ignore_index=True)

Mo_29_41.dat: 4650
Dy_29_35.dat: 1200
Dy_32_39.dat: 2000
Er_34_41.dat: 2800
Ba_21_29.dat: 1500


In [7]:
df.reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
            'LowerLevel', 'LowerLevelNr', 'LowerLevelJ', 'Element', 'Energy'], axis=1).head()

Unnamed: 0,Wavelength,Intensity,State,UpperLevel,UpperLevelNr,UpperLevelJ,LowerLevel,LowerLevelNr,LowerLevelJ,Element,Energy
0,1.000001,7.9252e-21,29,3p2-6g1,1135,2.0,3p2-3d1,-103,2.0,42,4650
1,1.000006,1.2223999999999999e-20,29,3p2-6g1,973,3.0,3p2-3d1,-76,2.0,42,4650
2,1.000006,1.8846e-16,31,3p1-6d1,342,2.0,3p1-3d1,-152,3.0,42,4650
3,1.000009,1.2406e-22,29,3p2-6g1,933,5.0,3p2-3d1,-67,4.0,42,4650
4,1.000009,3.6022e-15,32,2p5-3p2,-30,3.5,3d1-4d1,-943,3.5,42,4650


In [8]:
df = df.reindex(['Wavelength','Intensity','State','UpperLevel', 'UpperLevelNr', 'UpperLevelJ',
            'LowerLevel', 'LowerLevelNr', 'LowerLevelJ', 'Element', 'Energy'], axis=1)

## We have data for Yttrium as well but it is in a different format

In [9]:
%%bash
ls ../data/theoretical/Y | head

SP1000.00_1.00e+11_0_0_4000_30.dat
SP1000.00_1.00e+11_0_0_4000_31.dat
SP1000.00_1.00e+11_0_0_4000_32.dat
SP1000.00_1.00e+11_0_0_4000_33.dat
SP1000.00_1.00e+11_0_0_4000_34.dat
SP1000.00_1.00e+11_0_0_4000_35.dat
SP1000.00_1.00e+11_0_0_4000_36.dat
SP1000.00_1.00e+11_0_0_4000_37.dat
SP1000.00_1.00e+11_0_0_4100_30.dat
SP1000.00_1.00e+11_0_0_4100_31.dat


Charge and energy are in the files name

In [10]:
Y_fold = '../data/theoretical/Y/'

In [11]:
firstloop = 1

for file in os.listdir(Y_fold):
    print(file)
    energy = int(file.split('_')[-2])
    cd = organizeFile(Y_fold+file)
    cdf = pd.DataFrame(cd)
    cdf['Energy'] = energy
    cdf['Element'] = 39
    df = pd.concat([df, cdf], ignore_index=True)

SP1000.00_1.00e+11_0_0_9000_34.dat
SP1000.00_1.00e+11_0_0_4100_37.dat
SP1000.00_1.00e+11_0_0_4000_37.dat
SP1000.00_1.00e+11_0_0_4000_36.dat
SP1000.00_1.00e+11_0_0_4100_36.dat
SP1000.00_1.00e+11_0_0_9000_35.dat
SP1000.00_1.00e+11_0_0_9000_37.dat
SP1000.00_1.00e+11_0_0_4000_34.dat
SP1000.00_1.00e+11_0_0_4100_34.dat
SP1000.00_1.00e+11_0_0_4100_35.dat
SP1000.00_1.00e+11_0_0_4000_35.dat
SP1000.00_1.00e+11_0_0_9000_36.dat
SP1000.00_1.00e+11_0_0_9000_32.dat
SP1000.00_1.00e+11_0_0_4000_31.dat
SP1000.00_1.00e+11_0_0_4100_31.dat
SP1000.00_1.00e+11_0_0_4100_30.dat
SP1000.00_1.00e+11_0_0_4000_30.dat
SP1000.00_1.00e+11_0_0_9000_33.dat
SP1000.00_1.00e+11_0_0_9000_31.dat
SP1000.00_1.00e+11_0_0_4100_32.dat
SP1000.00_1.00e+11_0_0_4000_32.dat
SP1000.00_1.00e+11_0_0_4000_33.dat
SP1000.00_1.00e+11_0_0_4100_33.dat
SP1000.00_1.00e+11_0_0_9000_30.dat
SP1000.00_1.00e+11_0_0_5000_37.dat
SP1000.00_1.00e+11_0_0_4700_31.dat
SP1000.00_1.00e+11_0_0_4600_31.dat
SP1000.00_1.00e+11_0_0_4600_30.dat
SP1000.00_1.00e+11_0

In [12]:
df.Element.unique()

array([42, 66, 68, 56, 39])

In [13]:
df.to_csv('theoSpec.csv', index=False)