In [1]:
import numpy as np 
import pandas as pd 

meV2kcal = 23.0609 * 10**(-3) # 1 eV   = 23.0609 kcal/mol
bohr2A   = 0.529177249        # 1 Bohr = 0.529177249 A 

For **MD17** dataset, the MAEs will be always in the follwoing order: 
1. Aspirin
2. Benzene
3. Ethanol
4. Malonaldehyde
5. Naphthalene
6. Salicylic_Acid
7. Toluene
8. Uracil

For **rMD17** dataset, the MAEs will be always in the follwoing order: 
1. Aspirin
2. Azobenzene
3. Benzene
4. Ethanol
5. Malonaldehyde
6. Naphthalene
7. Paracetamol
8. Salicylic_Acid
9. Toluene
10. Uracil

For **QM9** dataset, the MAEs will be always in the follwoing order: 
1.  $\mu$  
2.  $\alpha$  
3.  $\epsilon_{\text{HOMO}}$  
4.  $\epsilon_{\text{LUMO}}$  
5.  $\Delta \epsilon$  
6.  $\langle R^2 \rangle$  
7.  $\text{ZPVE}$  
8.  $U_0$  
9.  $U$  
10. $H$  
11. $G$  
12. $c_v$

If analysis for a molcule for particular ML archetecture is missing, then it will be marked by '**-1**'.

Poitns to be remebered
1. For MD17 , training size is <b>1000</b> datasets (950/50).
2. Data from <b>original</b> paper.
3. <b>Rounding</b> of MAEs.

In [2]:
schnet = { # Train on both
    'MD17': {
        'unit': 'kcal',
        'E': [0.37,0.08,0.08,0.13,0.16,0.20,0.12,0.14],
        'F': [1.35,0.31,0.39,0.66,0.58,0.85,0.57,0.56]
    },
    'QM9': {
        'unit': 'kcal/mol_D_A3',
        'mae': [0.033,0.035,0.945,0.784,1.453,0.020,0.039,0.323,0.438,0.323,0.323,0.033]
    }
}


In [3]:
# mgcn = {
#     'MD17': {
#         'unit': ,
#         'E': [],
#         'F': []
#     }
# }

# MGCN have no data with MD17.

In [4]:
dimenet = {
    'MD17': {
        'unit': 'kcal',
        'E': [0.204,0.078,0.064,0.104,0.122,0.134,0.102,0.115],
        'F': [0.499,0.187,0.230,0.383,0.215,0.374,0.216,0.301]
    },
    'QM9': {
        'unit': 'kcal/mol_D_A3',
        'mae': [0.030,0.007,0.567,0.450,0.752,0.093,0.028,0.146,0.145,0.151,0.174,0.023]
    }
}

In [5]:
physnet  = {
    'MD17': {
        'unit': 'kcal',
        'E': [0.230,'x',0.059,0.094,0.142,0.126,0.100,0.108],
        'F': [0.605,'x',0.160,0.319,0.310,0.337,0.191,0.218]
    },
    'QM9': {
        'unit': 'kcal/mol_D_A3',
        'mae': [0.053,0.009,0.759,0.570,0.980,0.214,0.032,0.188,0.192,0.194,0.217,0.028]
    }
}

In [6]:
gemnet_q  = {
    'MD17': {
        'unit': 'meV',
        'E': ['x','x','x','x','x','x','x','x'],
        # 'F': [0.217,'x','x',0.088,0.159,0.051,'x',0.125,0.060,0.104]
        'F': [9.4,6.3,3.8,6.9,2.2,5.4,2.6,4.5]
    }
}

gemnet_t  = {
    'MD17': {
        'unit': 'meV',
        'E': ['x','x','x','x','x','x','x','x'],
        # 'F': [0.219,'x','x',0.085,0.155,0.055,'x',0.127,0.060,0.097]
        'F': [9.5,6.3,3.7,6.7,2.4,5.5,2.6,4.2]
    }
}

In [7]:
painn_force_only = { #Trained on forces only
    'MD17': {
        'unit': 'kcal',
        'E': [0.167,'x',0.064,0.100,0.116,0.116,0.095,0.106],
        'F': [0.338,'x',0.224,0.344,0.077,0.195,0.094,0.139]
    }
}

painn_force_energy = { #Trained on both both energies and forces
    'MD17': {
        'unit': 'kcal',
        'E': [0.159,'x',0.063,0.091,0.117,0.114,0.097,0.104],
        'F': [0.371,'x',0.230,0.319,0.083,0.209,0.102,0.140]
    },
    'QM9': {
        'unit': 'kcal/mol_D_A3',
        'mae': [0.012,0.007,0.636,0.470,1.054,0.018,0.030,0.135,0.134,0.138,0.169,0.024]
    }
}

In [8]:
eninet = {
    'MD17': {
        'unit': 'kcal',
        'E': [0.148,0.074,0.043,0.071,0.077,0.092,0.078,0.093],
        'F': [0.198,0.169,0.100,0.192,0.046,0.093,0.051,0.082]
    }
}

In [9]:
spookynet  = {
    'MD17': {
        'unit': 'kcal',
        'E': [0.151,'x',0.052,0.079,0.116,0.114,0.094,0.105],
        'F': [0.258,'x',0.094,0.167,0.089,0.180,0.087,0.119]
    }
}

In [10]:
neqip_l0 = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.581,0.468,0.074,0.046,0.101,0.339,0.404,0.263,0.224,0.231],
    #     'F': [0.973,0.793,0.238,0.274,0.535,0.475,0.775,0.687,0.613,0.600]
    # }
        'rMD17': {
        'unit': 'meV',
        'E': [25.2,20.3, 3.2, 2.0, 4.4,14.7,17.5,11.4, 9.7,10.0],
        'F': [42.2,34.4,10.3,11.9,23.2,20.6,33.6,29.8,26.6,26.0]
    }
}

neqip_l1 = {
    # 'rMD17': {
    #     'unit': 'kcal''kcal',
    #     'E': [0.088,0.025,0.002,0.023,0.037,0.009,0.048,0.023,0.012,0.014],
    #     'F': [0.291,0.104,0.009,0.150,0.238,0.048,0.214,0.131,0.060,0.095]
    # }
        'rMD17': {
        'unit': 'meV',
        'E': [ 3.8,1.1,0.09,1.0, 1.6,0.4,2.1,1.0,0.5,0.6],
        'F': [12.6,4.5,0.4, 6.5,10.3,2.1,9.3,5.7,2.6,4.1]
    }
}

neqip_l2 = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.055,0.018,0.001,0.012,0.021,0.007,0.032,0.018,0.007,0.009],
    #     'F': [0.196,0.076,0.009,0.081,0.136,0.032,0.136,0.097,0.042,0.067]
    # }
        'rMD17': {
        'unit': 'meV',
        'E': [2.4,0.8,0.06,0.5,0.9,0.3,1.4,0.8,0.3,0.4],
        'F': [8.5,3.3,0.4, 3.5,5.9,1.4,5.9,4.2,1.8,2.9]
    }
}

neqip_l3 = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.053,0.016,0.001,0.009,0.018,0.005,0.032,0.016,0.007,0.009],
    #     'F': [0.189,0.067,0.007,0.065,0.118,0.030,0.136,0.092,0.037,0.071]
    # }
    'MD17': {
        'unit': 'meV',
        'E': [5.7,'x',2.2,3.3,4.9,4.6,4.0,4.5],
        'F': [8.0,'x',3.1,5.6,1.7,3.9,2.0,3.3]
    },
        'rMD17': {
        'unit': 'meV',
        'E': [2.3,0.7,0.04,0.4,0.8,0.2,1.4,0.7,0.3,0.4],
        'F': [8.2,2.9,0.3, 2.8,5.1,1.3,5.9,4.0,1.6,3.1]
    }
}

In [11]:
mace = {
    'rMD17': {
        'unit': 'kcal',
        'E': [0.051,0.028,0.009,0.009,0.018,0.012,0.030,0.021,0.012,0.012],
        'F': [0.152,0.069,0.007,0.048,0.095,0.037,0.111,0.071,0.035,0.048]
    }
}

In [12]:
newtonnet = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.168,0.142,'x',0.078,0.096,0.118,0.135,0.115,0.094,0.107],
    #     'F': [0.348,0.138,'x',0.264,0.323,0.084,0.263,0.197,0.088,0.149]
    # },
    'MD17': {
        'unit': 'kcal',
        'E': [0.168,'x',0.078,0.096,0.118,0.115,0.094,0.107],
        'F': [0.348,'x',0.264,0.323,0.084,0.197,0.088,0.149]
    }
}

In [13]:
allegro = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.053,0.028,0.007,0.009,0.014,0.012,0.035,0.021,0.009,0.014],
    #     'F': [0.168,0.060,0.005,0.048,0.083,0.021,0.113,0.067,0.042,0.042]
    # }
    'rMD17': {
        'unit': 'meV',
        'E': [2.3,1.2,0.3,0.4,0.6,0.5,1.5,0.9,0.4,0.6],
        'F': [7.3,2.6,0.2,2.1,3.6,0.9,4.9,2.9,1.8,1.8]
    }
}

In [14]:
so3krates = {
    'MD17': {
        'unit': 'kcal',
        'E': [0.139,'x',0.052,0.077,0.115,0.106,0.095,0.103],
        'F': [0.236,'x',0.096,0.147,0.074,0.145,0.073,0.111]
    }
}

In [15]:
# fchl_19 = {
#     'MD17': {
#         'unit': 'kcal',
#         'E': [0.182,'x','x',0.054,0.081,0.117,'x',0.114,0.098,0.104],
#         'F': [0.478,'x','x',0.136,0.245,0.151,'x',0.221,0.203,0.105]
#     }
# } 

fchl_19 = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.143,0.065,0.008,0.021,0.035,0.028,0.067,0.042,0.039,0.014],
    #     'F': [0.482,0.249,0.060,0.143,0.238,0.150,0.284,0.219,0.203,0.097]
    # }
    'rMD17': { # original paper ?
        'unit': 'meV',
        'E': [ 6.2, 2.8,0.35,0.9, 1.5,1.2, 2.9,1.8,1.7,0.6],
        'F': [20.9,10.8,2.6, 6.2,10.3,6.5,12.3,9.5,8.8,4.2]
    }
} 

In [16]:
gap_soap = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.408,0.196,0.017,0.081,0.111,0.088,0.196,0.129,0.092,0.069],
    #     'F': [1.035,0.565,0.138,0.417,0.609,0.381,0.666,0.570,0.410,0.406]
    # }
    'rMD17': {
        'unit': 'meV',
        'E': [17.7, 8.5,0.75, 3.5, 4.8, 3.8, 8.5, 5.6, 4.0, 3.0],
        'F': [44.9,24.5,6.0, 18.1,26.4,16.5,28.9,24.7,17.8,17.6]
    }
}

In [17]:
sgdml = {
    'MD17': { # original paper ?
        'unit': 'kcal',
        'E': [0.19,0.092,0.07,0.10,0.12,0.12,0.10,0.11],
        'F': [0.68,0.409,0.33,0.41,0.11,0.28,0.14,0.24]
    },
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.166,0.099,0.001,0.055,0.071,0.018,0.115,0.048,0.023,0.032],
    #     'F': [0.733,0.443,0.018,0.369,0.434,0.125,0.537,0.295,0.145,0.240]
    # }
    'rMD17': {
        'unit': 'meV',
        'E': [ 7.2, 4.3,0.06, 2.4, 3.1,0.8, 5.0, 2.1,1.0, 1.4],
        'F': [31.8,19.2,0.8, 16.0,18.8,5.4,23.3,12.8,6.3,10.4]
    }
}

In [18]:
ace = {
    # 'MD17': {
    #     'unit': 'kcal',
    #     'E': [0.141,0.083,0.001,0.028,0.039,0.021,0.092,0.042,0.025,0.025],
    #     'F': [0.413,0.251,0.012,0.168,0.256,0.118,0.293,0.214,0.150,0.152]
    # }
    'rMD17': {
        'unit': 'meV',
        'E': [ 6.1, 3.6,0.04,1.2, 1.7,0.9, 4.0,1.8,1.1,1.1],
        'F': [17.9,10.9,0.5, 7.3,11.1,5.1,12.7,9.3,6.5,6.6]
    }
}

In [19]:
mgnn = {
    # 'rMD17': {
    #     'unit': 'kcal',
    #     'E': [0.071,0.053,0.001,0.007,0.018,0.012,0.055,0.030,0.009,0.009],
    #     'F': [0.210,0.143,0.005,0.062,0.118,0.060,0.157,0.148,0.055,0.062]
    # }
    'rMD17': {
        'unit': 'meV',
        'E': [3.1,2.3,0.03,0.3,0.8,0.5,2.4,1.3,0.4,0.4],
        'F': [9.1,6.2,0.2, 2.7,5.1,2.6,6.8,6.4,2.4,2.7]
    }
}

In [20]:
framework_MD17 = [
    ['SchNet',schnet],
    ['DimeNet',dimenet],
    ['PhysNet',physnet],
    ['GemNet(Q)',gemnet_q],
    ['GemNet(T)',gemnet_t],
    ['PaiNN(F)',painn_force_only],
    ['PaiNN(F+E)',painn_force_energy ],
    ['ENINet',eninet],
    ['SpookyNet',spookynet],
    ['NequIP(l=0)',neqip_l0],
    ['NequIP(l=1)',neqip_l1],
    ['NequIP(l=2)',neqip_l2],
    ['NequIP(l=3)',neqip_l3],
    ['MACE',mace],
    ['NewtonNet',newtonnet],
    ['Allegro',allegro],
    ['SO3krates',so3krates],
    ['FCHL19',fchl_19],
    ['GAP(SOAP)',gap_soap],
    ['sGDML',sgdml],
    ['ACE',ace],
    ['MGNN',mgnn]
]

#### MD17 dataset

In [21]:
data = {} # Energy

for ml in framework_MD17:  
    if 'MD17' in ml[1]:

        if ml[1]['MD17']['unit'] == 'kcal':
            data[ml[0]] = ml[1]['MD17']['E']

        elif ml[1]['MD17']['unit'] == 'meV':
            vals = []
            for val in ml[1]['MD17']['E']:
                if isinstance(val, (int, float)):
                    vals.append(f'{val*meV2kcal:.3f}')
                else:
                    vals.append(val)
            data[ml[0]] = vals
        
        else:
            raise AssertionError(f"Mismatched energy unit: {ml[1]['MD17']['unit']}")

df = pd.DataFrame(data)
df.index = ['Aspirin','Benzene','Ethanol','Malonaldehyde','Naphthalene','Salicylic_Acid','Toluene','Uracil']
df.index.name = "MD17_molecules"
df.to_csv("MD17_Energy.csv", index=True) 


data = {} # Force

for ml in framework_MD17:  
    if 'MD17' in ml[1]:

        if ml[1]['MD17']['unit'] == 'kcal':
            data[ml[0]] = ml[1]['MD17']['F']

        elif ml[1]['MD17']['unit'] == 'meV':
            vals = []

            for val in ml[1]['MD17']['F']:
                if isinstance(val, (int, float)):
                    vals.append(f'{val*meV2kcal:.3f}')
                else:
                    vals.append(val)
            data[ml[0]] = vals

        else:
            raise AssertionError(f"Mismatched energy unit: {ml[1]['MD17']['unit']}")

df = pd.DataFrame(data)
df.index = ['Aspirin','Benzene','Ethanol','Malonaldehyde','Naphthalene','Salicylic_Acid','Toluene','Uracil']
df.index.name = "MD17_molecules"
df.to_csv("MD17_Force.csv", index=True) 

#### rMD17 dataset

In [22]:
data = {} # Energy

for ml in framework_MD17:  
    if 'rMD17' in ml[1]:
    
        if ml[1]['rMD17']['unit'] == 'kcal':
            data[ml[0]] = ml[1]['rMD17']['E']

        elif ml[1]['rMD17']['unit'] == 'meV':
            vals = []
            for val in ml[1]['rMD17']['E']:
                if isinstance(val, (int, float)):
                    vals.append(f'{val*meV2kcal:.3f}')
                else:
                    vals.append(val)
            data[ml[0]] = vals
        
        else:
            raise AssertionError(f"Mismatched energy unit: {ml[1]['rMD17']['unit']}")

df = pd.DataFrame(data)
df.index = ['Aspirin','Azobenzene','Benzene','Ethanol','Malonaldehyde','Naphthalene','Paracetamol','Salicylic_Acid','Toluene','Uracil']
df.index.name = "rMD17_molecules"
df.to_csv("rMD17_Energy.csv", index=True) 


data = {} # Force

for ml in framework_MD17:  
    if 'rMD17' in ml[1]:

        if ml[1]['rMD17']['unit'] == 'kcal':
            data[ml[0]] = ml[1]['rMD17']['F']

        elif ml[1]['rMD17']['unit'] == 'meV':
            vals = []

            for val in ml[1]['rMD17']['F']:
                if isinstance(val, (int, float)):
                    vals.append(f'{val*meV2kcal:.3f}')
                else:
                    vals.append(val)
            data[ml[0]] = vals

        else:
            raise AssertionError(f"Mismatched energy unit: {ml[1]['rMD17']['unit']}")

df = pd.DataFrame(data)
df.index = ['Aspirin','Azobenzene','Benzene','Ethanol','Malonaldehyde','Naphthalene','Paracetamol','Salicylic_Acid','Toluene','Uracil']
df.index.name = "rMD17_molecules"
df.to_csv("rMD17_Force.csv", index=True) 

For QM9 dataset

In [23]:
# data = {} 

# for ml in framework_MD17:  
#     if 'QM9' in ml[1]:
#         data[ml[0]] = ml[1]['QM9']['mae']

# df = pd.DataFrame(data)
# df.index = ['μ','α ','HOMO','LUMO','∆','<R2>','ZPVE','U0','U','H','G','cv']
# df.index.name = "QM9_properties"
# df.to_csv("QM9.csv", index=True)