In [6]:
import pandas as pd
import re

def remove_leading_zeros(cas_number):
    """
    Removes leading zeros from a CAS number.

    Parameters:
    - cas_number (str): The original CAS number (e.g., '000071-41-0').

    Returns:
    - str: The CAS number without leading zeros (e.g., '71-41-0').
    """
    # Use regex to replace leading zeros before the first hyphen
    return re.sub(r'^0+(\d+-\d+-\d+)$', r'\1', cas_number)


In [14]:
bp_data = pd.read_excel('Boiling_Pt_TestSet.xls', sheet_name='BP_PhysProp')
mp_data = pd.read_excel('Melting_Pt_TestSet.xls', sheet_name='MP_PhysProp')
vp_data = pd.read_excel('VaporPressure_TestSet.xls', sheet_name='Vapor Pressure Test Set')
# Remove leading zeros from the CAS numbers
bp_data['CAS'] = bp_data['CAS'].apply(remove_leading_zeros)
# remove column Unnamed: 4	from bp_data
bp_data = bp_data.drop(columns=['Unnamed: 4'])
mp_data['CAS'] = mp_data['CAS'].apply(remove_leading_zeros)
vp_data['CAS'] = vp_data['CAS'].apply(remove_leading_zeros)
# save each df to a csv file
bp_data.to_csv('Boiling_Point_PhysChem.csv', index=False)
mp_data.to_csv('Melting_Point_PhysChem.csv', index=False)
vp_data.to_csv('Vapor_Pressure_PhysChem.csv', index=False)

In [19]:
vp_data

Unnamed: 0,CAS,Name,MW,VP (mm Hg),log VP,Estimate VP (mm Hg),log VP est,Error,VP temp (degC),VP type,Reference
0,50-00-0,FORMALDEHYDE,30.026,3.886000e+03,3.589503,3.490000e+03,3.542825,-0.046677,25,EXT,"BOUBLIK,T ET AL. (1984)"
1,50-21-5,LACTIC ACID,90.079,8.130000e-02,-1.089909,2.860000e-02,-1.543634,-0.453725,25,EXP,"DAUBERT,TE & DANNER,RP (1989)"
2,50-29-3,"P,P'-DDT",354.490,1.600000e-07,-6.795880,7.470000e-06,-5.126679,1.669201,20,EXP,"BIDLEMAN,TF & FOREMAN,WT (1987)"
3,50-31-7,"2,3,6-TRICHLOROBENZOIC ACID",225.460,5.500000e-04,-3.259637,5.630000e-05,-4.249492,-0.989854,25,EXP,"FREAR,DS (1976)"
4,50-32-8,BENZO(A)PYRENE,252.320,5.490000e-09,-8.260428,9.820000e-10,-9.007889,-0.747461,25,EXT,"MURRAY,JJ ET AL. (1974)"
...,...,...,...,...,...,...,...,...,...,...,...
3032,SRC002-52-9,3-PENTYL NITRATE,133.150,5.400000e+00,0.732394,6.360000e+00,0.803457,0.071063,25,EXP,"HAUFF,K ET AL. (1998)"
3033,SRC003-67-1,Bis(tribuyl tin) ether,596.080,7.500000e-06,-5.124939,7.800000e-06,-5.107905,0.017033,20,EXP,"BLUNDEN,SJ ET AL. (1984)"
3034,SRC003-73-3,Limonene oxide (cis + trans),152.240,6.230000e-01,-0.205512,1.590000e+00,0.201397,0.406909,25,EXP,"FICHAN,I ET AL. (1999)"
3035,SRC003-80-0,Pentafluoroethyl methyl ether,150.050,1.630000e+03,3.212188,3.430000e+03,3.535294,0.323107,27,EXP,"OHTA,H ET AL. (2001)"
