In [1]:
import pandas as pd
import myutils.data_util

In [2]:
file_path = 'data/raw_AME2020.txt'

In [28]:
cols_needed = [
    (1, 6),  # N - Z
    (6, 10),  # N
    (11, 16),  # Z
    (16, 20),  # A
    (56, 69),  # Experimental BINDING ENERGY / A (keV)
    (124, 130)  # Uncertainty (keV)
]

In [29]:
df = pd.read_fwf(
    file_path,
    colspecs=cols_needed,
    header=None,
    skiprows=36
)

In [30]:
df.columns = ['N-Z',
              'N',
              'Z',
              'A',
              'BINDING_energy_per_nucleon(keV)',
              'Uncertainty(keV)',
              ]
df.head()

Unnamed: 0,N-Z,N,Z,A,BINDING_energy_per_nucleon(keV),Uncertainty(keV)
0,1,1,0,1,0.0,0.0
1,-1,0,1,1,0.0,0.0
2,0,1,1,2,1112.2831,0.0
3,1,2,1,3,2827.2654,0.0
4,-1,1,2,3,2572.68044,0.0


In [31]:
# delete non-experimental data which contains '#'
# df = df[~df['BINDING_energy_per_nucleon(keV)'].str.contains('#', na=False)]

In [32]:
# define a data clean function to 
# delete space and asterisk
# and a converse dtype from `string` to `float`
def clean_and_convert(column_to_clean):
    return df[column_to_clean].str.replace(r'[ *#]', '', regex=True).astype(float)

In [33]:
df['Uncertainty(keV)'] = clean_and_convert('Uncertainty(keV)')
df['BINDING_energy_per_nucleon(keV)'] = clean_and_convert('BINDING_energy_per_nucleon(keV)')
print(f"{len(df)} samples in the dataset now.")

3558 samples in the dataset now.


In [34]:
# data filtering
df = df[(df['Z'] >= 8) & (df['N'] >= 8)]
# df = df[df['Uncertainty(keV)'] <= 150]
print(f"{len(df)} samples in the dataset now.")

3456 samples in the dataset now.


In [35]:
# calculate new columns as inputs
df['A^(2/3)'] = df['A'] ** (2 / 3)
df['A^(-1/3)'] = df['A'] ** (-1 / 3)
df['P'] = ((-1) ** df['N'] + (-1) ** df['Z']) / 2
df['I'] = 1 - 2 * df['Z'] / df['A']
df['B_exp(MeV)'] = df['BINDING_energy_per_nucleon(keV)'] * df['A'] / 1000

In [36]:
def cal_binding_ldm(row):
    a_v = 15.6606
    k_v = -2.0026
    a_s = -18.4157
    k_s = -3.0620
    a_c = -0.7149
    c1 = -42.3948
    a_pair = 6.7656

    Z = row['Z']
    N = row['N']
    A = row['A']
    Iso = row['I']

    def delta_np(z, n):

        if z % 2 == 0 and n % 2 == 0:  # Z-even, N-even
            return 2 - abs(Iso)
        elif z % 2 == 1 and n % 2 == 1:  # Z-odd, N-odd
            return abs(Iso)
        elif z % 2 == 1 and n % 2 == 0:  # Z-odd, N-even
            if n > z:
                return 1 - abs(Iso)
            else:
                return 1
        elif z % 2 == 0 and n % 2 == 1:  # Z-even, N-odd
            if n > z:
                return 1
            else:
                return 1 - abs(Iso)

    B_LDM = (a_v * (1 + k_v * Iso ** 2) * A +
             a_s * (1 + k_s * Iso ** 2) * A ** (2 / 3) +
             a_c * Z ** 2 / A ** (1 / 3) * (1 - Z ** (-2 / 3)) +
             c1 * ((2 - abs(Iso)) / (2 + abs(Iso) * A)) * Iso ** 2 * A +
             a_pair * A ** (-1/3) * delta_np(Z, N))

    return B_LDM

In [37]:
# Calculate the LDM binding energy and the residual with experimental data
df['B_LDM(MeV)'] = df.apply(cal_binding_ldm, axis=1)
df['LDM_residual(MeV)'] = df['B_exp(MeV)'] - df['B_LDM(MeV)']

In [38]:
magic_numbers = [8, 20, 28, 50, 82, 126]

def nearest_magic_number(nucleons):
    return min(magic_numbers, key=lambda magic: abs(magic - nucleons))

def cal_d(row):
    z = row['Z']
    n = row['N']

    nearest_magic_Z = nearest_magic_number(z)
    nearest_magic_N = nearest_magic_number(n)

    v_p = abs(z - nearest_magic_Z)
    v_n = abs(n - nearest_magic_N)

    if v_p + v_n == 0:
        return 0

    P = (v_p * v_n) / (v_p + v_n)
    return P

df['D'] = df.apply(cal_d, axis=1)

In [39]:
# Calculate the nucleon separation energy
df['Sn_exp(MeV)'] = myutils.data_utility.neutron_1_separation_energy(dataframe=df, head_of_BindingEnergy='B_exp(MeV)')
df['Sp_exp(MeV)'] = myutils.data_utility.proton_1_separation_energy(dataframe=df, head_of_BindingEnergy='B_exp(MeV)')
df['S2n_exp(MeV)'] = myutils.data_utility.neutron_2_separation_energy(dataframe=df, head_of_BindingEnergy='B_exp(MeV)')
df['S2p_exp(MeV)'] = myutils.data_utility.proton_2_separation_energy(dataframe=df, head_of_BindingEnergy='B_exp(MeV)')

In [40]:
print(f'There are totally {len(df)} rows in the dataset, and each sample has {df.shape[1]} dimensions:\n{df.dtypes}')

There are totally 3456 rows in the dataset, and each sample has 18 dimensions:
N-Z                                  int64
N                                    int64
Z                                    int64
A                                    int64
BINDING_energy_per_nucleon(keV)    float64
Uncertainty(keV)                   float64
A^(2/3)                            float64
A^(-1/3)                           float64
P                                  float64
I                                  float64
B_exp(MeV)                         float64
B_LDM(MeV)                         float64
LDM_residual(MeV)                  float64
D                                  float64
Sn_exp(MeV)                        float64
Sp_exp(MeV)                        float64
S2n_exp(MeV)                       float64
S2p_exp(MeV)                       float64
dtype: object


In [41]:
save_path = 'data/all_dataset.csv'
df.to_csv(save_path, index=False)