In [1]:
# Download atomic data

!wget 'http://kookaburra.phyast.pitt.edu/hillier/cmfgen_files/atomic_data_15nov16.tar.gz'

--2021-03-18 16:22:52--  http://kookaburra.phyast.pitt.edu/hillier/cmfgen_files/atomic_data_15nov16.tar.gz
Resolving kookaburra.phyast.pitt.edu (kookaburra.phyast.pitt.edu)... 136.142.35.16
Connecting to kookaburra.phyast.pitt.edu (kookaburra.phyast.pitt.edu)|136.142.35.16|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 388849552 (371M) [application/x-gzip]
Saving to: ‘atomic_data_15nov16.tar.gz’


2021-03-18 16:24:53 (3.07 MB/s) - ‘atomic_data_15nov16.tar.gz’ saved [388849552/388849552]



In [2]:
# Extract atomic data

!tar -xvf atomic_data_15nov16.tar.gz

atomic/
atomic/.DS_Store
atomic/AL/
atomic/ARG/
atomic/BAR/
atomic/CA/
atomic/CARB/
atomic/CHANGES
atomic/CHL/
atomic/CHRO/
atomic/COB/
atomic/FE/
atomic/FLU/
atomic/HE/
atomic/HYD/
atomic/MAN/
atomic/MG/
atomic/misc/
atomic/NA/
atomic/NEON/
atomic/NICK/
atomic/NIT/
atomic/OXY/
atomic/PHOS/
atomic/POT/
atomic/rad_decay_data/
atomic/SCAN/
atomic/SIL/
atomic/SUL/
atomic/TIT/
atomic/VAN/
atomic/VAN/I/
atomic/VAN/I/27may10/
atomic/VAN/I/27may10/col_guess.dat
atomic/VAN/I/27may10/cross
atomic/VAN/I/27may10/vi_osc
atomic/VAN/I/27may10/vi_phot
atomic/TIT/II/
atomic/TIT/III/
atomic/TIT/IV/
atomic/TIT/IV/16oct15/
atomic/TIT/IV/18oct00/
atomic/TIT/IV/README
atomic/TIT/IV/18oct00/col_guess.dat
atomic/TIT/IV/18oct00/f_to_s_18.dat
atomic/TIT/IV/18oct00/f_to_s_ls_65.dat
atomic/TIT/IV/18oct00/jnk.tar
atomic/TIT/IV/18oct00/NEW_NAMES
atomic/TIT/IV/18oct00/phot_data.dat
atomic/TIT/IV/18oct00/tkiv_osc.dat
atomic/TIT/IV/18oct00/TkIVOSC_REV
atomic/TIT/IV/16oct15/col_guess.dat
atomic/TIT/IV/16oct15/f_to_s_1

In [3]:
import glob

def get_filepath(directory, search_filename):
  '''
  Recursively iterate over all the folders/files and return filepath of the searched filename
  '''
  pathname = directory + "/**"
  files = glob.glob(pathname, recursive=True)
  for filepath in files:
    filename = filepath.split('/')[-1]
    if filename == search_filename:
      return filepath

  return ''

In [4]:
# Search and get the path of the required file 'si2_osc_kurucz' 

filepath = get_filepath('./atomic','si2_osc_kurucz')
print(filepath)

./atomic/SIL/II/16sep15/si2_osc_kurucz


In [5]:
import re
import numpy as np
import pandas as pd


# Set pandas to display full table
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

file = open(filepath,'r')


lines_to_skip = 0   # Counter to skip lines
energy_levels = 0  # Number of entries in table1
transitions = 0   # Number of entries in table2

for line in file:
  lines_to_skip += 1

  # Get number of energy levels
  if re.search('Number of energy levels',line):
    energy_levels = int(line.split(' ')[0])

  # Get number of transitions
  elif re.search('Number of transitions',line):
    transitions = int(line.split(' ')[0])
    line = next(file)
    break

In [6]:
# Read table 1

column_names = ['Energy level','g','E(cm^-1)','10^15 Hz','eV','Lam(A)','ID','ARAD','C4','C6']
df1 = pd.read_csv(filepath, names=column_names, nrows=energy_levels, skiprows=lines_to_skip, sep="\s+")
lines_to_skip += energy_levels #skip over table 1 before reading table 2

display(df1)

Unnamed: 0,Energy level,g,E(cm^-1),10^15 Hz,eV,Lam(A),ID,ARAD,C4,C6
0,3s2_3p_2Po[1/2],2.0,0.0,3.95241,16.346,758.5,1,0.0,-5.2900000000000007e-17,7.470000000000001e-33
1,3s2_3p_2Po[3/2],4.0,287.24,3.9438,16.31,760.2,2,0.0,-5.3000000000000005e-17,7.46e-33
2,3s_3p2_4Pe[1/2],2.0,42824.29,2.66857,11.036,1123.0,3,12220.0,-5.34e-17,7.490000000000001e-33
3,3s_3p2_4Pe[3/2],4.0,42932.62,2.66532,11.023,1125.0,4,2110.0,-5.34e-17,7.490000000000001e-33
4,3s_3p2_4Pe[5/2],6.0,43107.91,2.66007,11.001,1127.0,5,3246.0,-5.3500000000000007e-17,7.490000000000001e-33
5,3s_3p2_2De[3/2],4.0,55309.35,2.29428,9.488,1307.0,6,838600.0,-6.48e-17,7.660000000000001e-33
6,3s_3p2_2De[5/2],6.0,55325.18,2.2938,9.486,1307.0,7,798600.0,-6.48e-17,7.660000000000001e-33
7,3s2_4s_2Se[1/2],2.0,65500.47,1.98875,8.225,1507.0,8,1087000000.0,-6.79e-16,2.03e-32
8,3s_3p2_2Se[1/2],2.0,76665.35,1.65404,6.841,1812.0,9,1111000000.0,-9.14e-17,1.1300000000000001e-32
9,3s2_3d_2De[3/2],4.0,79338.5,1.5739,6.509,1905.0,10,3597000000.0,-1.21e-15,1.6e-32


In [7]:
def realign_and_merge_table_entries(row):
  '''
  function to sync entries of table 2
  '''
  # If last column is not empty, it means transition column values are read into 2 separate columns
  # merge the transition column data in the form A-B in a single column
  if len(row[9]):
    row[1] = row[0] + row[1]

  # Empty last column means transition column values are already read in the same column
  # shift to align with the other rows
  else:
    row = row.shift(1, axis=0)

  # Merge i-j entries to one column
  row[5] = row[5] + row[6]

  return row

In [8]:
def format_table(df):
  '''
  function to format table 2
  '''
  # Removing columns that are not required as it contains '|' and NaN and redundant data
  df.drop([0,6,7,8,9], inplace=True, axis=1)

  # Rename all the columns
  df.rename(columns={1:'Transition', 2:'f', 3:'A', 4:'Lam(A)', 5:'i-j'}, inplace=True)

  # Add columns 'Lam(obs)' and '% Acc' filled with Nan values
  df['Lam(obs)'] = np.nan
  df['% Acc'] = np.nan

In [9]:
# Number of lines to skip after table1
table2_header_lines = 12 

# Read table2 
df2 = pd.read_csv(filepath, names=range(10), nrows=transitions, skiprows=lines_to_skip + table2_header_lines, delim_whitespace=True)

# Fill nan entries with white space
df2.fillna('', inplace=True)

df2 = df2.apply(realign_and_merge_table_entries, axis=1)

format_table(df2)

display(df2)

Unnamed: 0,Transition,f,A,Lam(A),i-j,Lam(obs),% Acc
0,3s2_3p_2Po[1/2]-3s_3p2_4Pe[1/2],5.741e-06,7023.0,2335.123,1-3,,
1,3s2_3p_2Po[1/2]-3s_3p2_4Pe[3/2],3.564e-08,21.91,2329.231,1-4,,
2,3s2_3p_2Po[1/2]-3s_3p2_2De[3/2],0.0007638,779300.0,1808.013,1-6,,
3,3s2_3p_2Po[1/2]-3s2_4s_2Se[1/2],0.1279,366100000.0,1526.707,1-8,,
4,3s2_3p_2Po[1/2]-3s_3p2_2Se[1/2],0.1062,416200000.0,1304.37,1-9,,
5,3s2_3p_2Po[1/2]-3s2_3d_2De[3/2],1.449,3041000000.0,1260.422,1-10,,
6,3s2_3p_2Po[1/2]-3s_3p2_2Pe[1/2],0.5942,2784000000.0,1193.29,1-14,,
7,3s2_3p_2Po[1/2]-3s_3p2_2Pe[3/2],0.2844,669400000.0,1190.416,1-15,,
8,3s2_3p_2Po[1/2]-3s2_5s_2Se[1/2],0.01603,102600000.0,1020.699,1-16,,
9,3s2_3p_2Po[1/2]-3s2_4d_2De[3/2],0.1469,499900000.0,989.873,1-17,,
