### Imports

In [12]:
import os
import warnings
import sys
import numpy as np
import pandas as pd
#print(sys.path)

In [13]:
from carsus.io.cmfgen import (hdf_dump,CMFGENEnergyLevelsParser,CMFGENOscillatorStrengthsParser,CMFGENCollisionalDataParser,CMFGENPhotoionizationCrossSectionParser)

### Getting the CMFGEN dataset directory

In [22]:
os.environ['CMFGEN_DIR']='../CMFGEN/'
cmfgen_dir=os.getenv('CMFGEN_DIR')
chunk_size=10
os.listdir(cmfgen_dir)
#print(cmfgen_dir)

FileNotFoundError: [Errno 2] No such file or directory: '../CMFGEN/'

### Setting file patterns to be matched

In [15]:
osc_patterns = ['osc', 'OSC', 'Osc']

In [16]:
hdf_dump(cmfgen_dir, osc_patterns, CMFGENEnergyLevelsParser(), chunk_size)

In [17]:
ignore_patterns = ['ERROR_CHK', 'hmi_osc']
hdf_dump(cmfgen_dir, osc_patterns, CMFGENOscillatorStrengthsParser(), chunk_size, ignore_patterns)

### Defining utility functions; help in finding specific rows.

In [18]:
def search_header(file, string):
    with open(file) as File:
        for line in File:
            if string in line:
                break
                
    n=int(line.split()[0])
    return n

In [19]:
def find_row(file, string):
    with open(file) as File:
        n=0
        for line in File:
            n += 1
            if string in line:
                break
    return (n-1)

### Setting file path and fine-tuning arguments

In [20]:
file='../CMFGEN/atomic/SIL/II/16sep15/si2_osc_kurucz'
args={}
args['header']=None
args['delim_whitespace']=True


### Getting Energy Levels Header

In [21]:

args['nrows']=search_header(file, "Number of energy levels")
args['skiprows']=find_row(file, "0.000")

energy_levels=pd.read_csv(file, **args)
energy_levels.columns=['Energy Level', 'g', 'E(cm^-1)', '10^15 Hz', 'eV', 'Lam(A)', 'ID', 'ARAD', 'C4', 'C6']
energy_levels


FileNotFoundError: [Errno 2] No such file or directory: '../CMFGEN/atomic/SIL/II/16sep15/si2_osc_kurucz'

### Printing oscillator strengths header in a naive way

In [None]:

args['nrows'] = search_header(file, "Number of transitions")
args['skiprows'] = find_row(file, "Transition") +1

oss = pd.read_csv(file, **args)
oss

### Printing oscillator strengths header using fixed column widths. Method not easily extensible to other files.

In [None]:
widths = [(0,44), (49,59), (61,71), (74,83), (87,94) ]
oss = pd.read_fwf(file, colspecs=widths, **args)
oss.columns = ['Transition','f','A','Lam(A)','i-j'] #,'Lam(obs)','% Acc']
oss['Lam(obs)'] = np.nan
oss['% Acc'] = np.nan
oss[1:]

In [None]:
oss=pd.read_csv(file, **args)
oss.iloc[[1,4195], :]

### Printing oscillator strengths header using regex. Method should work for most files with minimal changes.

In [None]:

args['delim_whitespace']=False
args['sep']='(?<=[^E])-(?:[ ]{1,})?|(?<!-)[ ]{2,}[-,\|]?'
oscillator_strengths=pd.read_csv(file, **args)
oscillator_strengths.columns=['State A', 'State B', 'f', 'A', 'Lam(A)', 'i', 'j', 'Lam(obs)', '%Acc', '?' ]
oscillator_strengths.columns
