## Prepare descriptors

In [None]:
# # install libraries
# ! pip install numba -q
# ! pip install mordred[full] -q
# ! pip install padelpy -q
# ! pip install rdkit-pypi -q
# ! pip install uamc-spectrophore -q

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import mutual_info_regression, f_regression, SelectPercentile, SelectKBest, SelectFromModel
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, IsolationForest
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.covariance import EllipticEnvelope
import xgboost as xgb
import tqdm
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error
from mordred import Calculator, descriptors
from rdkit import Chem
from rdkit.Chem import MolFromSmiles, SDWriter, Draw
from rdkit.Chem import AllChem, AddHs
from rdkit.Chem import rdFingerprintGenerator
from padelpy import from_smiles, from_sdf
from spectrophore import spectrophore
from statsmodels.stats.outliers_influence import variance_inflation_factor
import matplotlib.pyplot as plt
from statistics import mean
from functools import partial

In [None]:
data = pd.read_excel('./data_from_esi2.xlsx', sheet_name=1)

data.drop_duplicates(subset=data.columns.difference(['yield']), inplace=True)
data.reset_index(drop=True, inplace=True)
print(f'{data.shape = }')

data.shape = (109, 9)


In [None]:
if 'feed_material' in data.columns:
  le = LabelEncoder()
  data['feed_material'] = le.fit_transform(data['feed_material'])
  data

In [None]:
val_smiles = data['smiles'].values
smiles = np.array([smile.split('.') for smile in val_smiles])
smiles = smiles.flatten()
print(f'{len(smiles) = }')

len(smiles) = 218


In [None]:
nBITS = 512
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=nBITS)
ao_c = rdFingerprintGenerator.AdditionalOutput()
ao_c.AllocateBitInfoMap()
aos_c = []
tpls_c = []
ao_a = rdFingerprintGenerator.AdditionalOutput()
ao_a.AllocateBitInfoMap()
aos_a = []
tpls_a = []

fps = []

mols = []
for smi in smiles:
  mol = MolFromSmiles(smi)
  mols.append(mol)
print(f"{len(mols) = }")

for i, mol in enumerate(mols):
  if i % 2 == 0:
    fp = mfpgen.GetFingerprintAsNumPy(mol, additionalOutput=ao_c)
    mol_fp_info_map = ao_c.GetBitInfoMap()
    aos_c.append(mol_fp_info_map)
    for x in mol_fp_info_map.keys():
      tpls_c.extend([(mol, x, mol_fp_info_map)])
  else:
    fp = mfpgen.GetFingerprintAsNumPy(mol, additionalOutput=ao_a)
    mol_fp_info_map = ao_a.GetBitInfoMap()
    aos_a.append(mol_fp_info_map)
    for x in mol_fp_info_map.keys():
      tpls_a.extend([(mol, x, mol_fp_info_map)])
  fps.append(fp)

fps_np = np.array(fps)

cumulated_fps = []
for i, row in enumerate(fps_np):
  if i % 2 == 1:
    cumulated_fps.append(np.array([fps_np[i-1], fps_np[i]]).flatten())
print(f'{len(cumulated_fps[0]) = }')
print(f'{cumulated_fps[0] = }')
print(f'{len(cumulated_fps) = }')
fps_columns = np.array([['c_fp_' + f'{i+1}' for i in range(nBITS)], ['a_fp_' + f'{i+1}' for i in range(nBITS)]]).flatten()
cumulated_desc_fps = pd.DataFrame(cumulated_fps, columns = fps_columns)

tpls_c.sort(key = lambda i: i[1])
tpls_c_reduced = [tpls_c[0]]
for element in tpls_c:
  if element[1] != tpls_c_reduced[-1][1]:
    tpls_c_reduced.append(element)
print(f"{len(tpls_c_reduced) = }")
p_cations_fps = Draw.DrawMorganBits(tpls_c_reduced, molsPerRow=8, legends=['c_fp_' + str(x[1]) for x in tpls_c_reduced])
p_cations_fps.save('./cations_fps.png')

tpls_a.sort(key = lambda i: i[1])
tpls_a_reduced = [tpls_a[0]]
for element in tpls_a:
  if element[1] != tpls_a_reduced[-1][1]:
    tpls_a_reduced.append(element)
print(f"{len(tpls_a_reduced) = }")
p_anions_fps = Draw.DrawMorganBits(tpls_a_reduced, molsPerRow=8, legends=['a_fp_' + str(x[1]) for x in tpls_a_reduced])
p_anions_fps.save('./anions_fps.png')

cumulated_desc_fps

len(mols) = 218
len(cumulated_fps[0]) = 1024
cumulated_fps[0] = array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)
len(cumulated_fps) = 109
len(tpls_c_reduced) = 70
len(tpls_a_reduced) = 123


Unnamed: 0,c_fp_1,c_fp_2,c_fp_3,c_fp_4,c_fp_5,c_fp_6,c_fp_7,c_fp_8,c_fp_9,c_fp_10,...,a_fp_503,a_fp_504,a_fp_505,a_fp_506,a_fp_507,a_fp_508,a_fp_509,a_fp_510,a_fp_511,a_fp_512
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
105,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
106,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
107,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
sdf_writer = SDWriter('./mols.sdf')

mols = []
for smi in smiles:
  mol = MolFromSmiles(smi)
  molh = AddHs(mol)
  embeding_mol_result = AllChem.EmbedMolecule(molh)
  AllChem.MMFFOptimizeMolecule(molh)
  mols.append(molh)
  sdf_writer.write(molh)
print(f"{len(mols) = }")

len(mols) = 218


In [None]:
calc = Calculator(descriptors, ignore_3D=True)
desc_mordred = calc.pandas(mols)
print(desc_mordred)

cumulated_mordred = []
for i, row in enumerate(desc_mordred.values):
  if i % 2 == 1:
    cumulated_mordred.append(np.array([desc_mordred.values[i-1], desc_mordred.values[i]]).flatten())
print(f'{len(cumulated_mordred[0]) = }')
print(f'{cumulated_mordred[0] = }')
print(f'{len(cumulated_mordred) = }')
mordred_columns = np.array([['c_mordred_' + name for name in desc_mordred.columns.values], ['a_mordred_' + name for name in desc_mordred.columns.values]]).flatten()
cumulated_desc_mordred = pd.DataFrame(cumulated_mordred, columns = mordred_columns)
cumulated_desc_mordred

100%|██████████| 218/218 [00:45<00:00,  4.83it/s]


          ABC     ABCGG  nAcid  nBase    SpAbs_A   SpMax_A  SpDiam_A  \
0    4.719397  5.004088      0      1   6.720566  2.101003  4.202006   
1    6.651690  6.855805      1      2  11.239561  2.088622  4.177243   
2    4.719397  5.004088      0      1   6.720566  2.101003  4.202006   
3    3.047207  3.305183      1      1   5.226252  1.847759  3.695518   
4    4.719397  5.004088      0      1   6.720566  2.101003  4.202006   
..        ...       ...    ...    ...        ...       ...       ...   
213  6.651690  6.855805      1      2  11.239561  2.088622  4.177243   
214  4.719397  5.004088      0      1   6.720566  2.101003  4.202006   
215  6.651690  6.855805      1      2  11.239561  2.088622  4.177243   
216  4.719397  5.004088      0      1   6.720566  2.101003  4.202006   
217  6.651690  6.855805      1      2  11.239561  2.088622  4.177243   

        SpAD_A   SpMAD_A   LogEE_A  ...     SRW10     TSRW10          MW  \
0     6.720566  0.960081  2.779033  ...  8.123558  33.34394

Unnamed: 0,c_mordred_ABC,c_mordred_ABCGG,c_mordred_nAcid,c_mordred_nBase,c_mordred_SpAbs_A,c_mordred_SpMax_A,c_mordred_SpDiam_A,c_mordred_SpAD_A,c_mordred_SpMAD_A,c_mordred_LogEE_A,...,a_mordred_SRW10,a_mordred_TSRW10,a_mordred_MW,a_mordred_AMW,a_mordred_WPath,a_mordred_WPol,a_mordred_Zagreb1,a_mordred_Zagreb2,a_mordred_mZagreb1,a_mordred_mZagreb2
0,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,8.168770,37.326413,145.098251,6.308620,143,9,38.0,38.0,5.222222,2.527778
1,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,6.834109,27.254130,74.024752,8.224972,18,2,16.0,14.0,3.361111,1.333333
2,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,7.626083,30.698690,88.040402,7.336700,29,4,22.0,21.0,4.222222,1.444444
3,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,7.890957,32.688753,104.035317,8.002717,46,6,26.0,26.0,4.472222,1.777778
4,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,8.297793,35.071670,118.050967,7.378185,65,8,32.0,33.0,5.333333,1.888889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,5.766244,5.909452,0,1,9.910095,2.224503,4.194610,9.910095,1.238762,2.984531,...,6.188264,24.179697,59.013853,8.430550,9,0,12.0,9.0,3.111111,1.000000
105,5.766244,5.909452,0,1,9.910095,2.224503,4.194610,9.910095,1.238762,2.984531,...,6.188264,24.179697,59.013853,8.430550,9,0,12.0,9.0,3.111111,1.000000
106,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,8.168770,37.326413,145.098251,6.308620,143,9,38.0,38.0,5.222222,2.527778
107,4.719397,5.004088,0,1,6.720566,2.101003,4.202006,6.720566,0.960081,2.779033,...,8.168770,37.326413,145.098251,6.308620,143,9,38.0,38.0,5.222222,2.527778


In [None]:
desc_padel = []
dict_padel_sample = from_smiles(str(smiles[0]))
dict_padel_sample = {k: np.nan for k, v in dict_padel_sample.items()}
for i, smi in enumerate(smiles):
  try:
    desc_padel.append(from_smiles(str(smi), timeout=6))
  except:
    desc_padel.append(dict_padel_sample)

print(f'{len(desc_padel[0]) = }')
print(f'{desc_padel[0] = }')
print(f'{len(desc_padel) = }')

cumulated_padel = []
for i, row in enumerate(desc_padel):
  if i % 2 == 1:
    cation_padel = {f'c_padel_{k}': v for k, v in desc_padel[i-1].items()}
    anion_padel = {f'a_padel_{k}': v for k, v in desc_padel[i].items()}
    row_padel = {**cation_padel, **anion_padel}
    cumulated_padel.append(row_padel)

cumulated_desc_padel = pd.DataFrame(cumulated_padel)
cumulated_desc_padel

len(desc_padel[0]) = 1875
desc_padel[0] = {'nAcid': '0', 'ALogP': '-1.5741', 'ALogp2': '2.47779081', 'AMR': '29.0791', 'apol': '20.03710199999999', 'naAromAtom': '0', 'nAromBond': '0', 'nAtom': '21', 'nHeavyAtom': '7', 'nH': '14', 'nB': '0', 'nC': '5', 'nN': '1', 'nO': '1', 'nS': '0', 'nP': '0', 'nF': '0.0', 'nCl': '0.0', 'nBr': '0.0', 'nI': '0.0', 'nX': '0.0', 'ATS0m': '1187.7095509999988', 'ATS1m': '1182.8995539999996', 'ATS2m': '1485.2625359999986', 'ATS3m': '1135.1198279999987', 'ATS4m': '819.9723510000009', 'ATS5m': '199.75334400000003', 'ATS6m': '9.144576', 'ATS7m': '0.0', 'ATS8m': '0.0', 'ATS0v': '3012.461880965715', 'ATS1v': '3583.877571795581', 'ATS2v': '5201.1007494544765', 'ATS3v': '5925.515672425086', 'ATS4v': '4177.139914939598', 'ATS5v': '1641.839924497501', 'ATS6v': '279.75369987837234', 'ATS7v': '0.0', 'ATS8v': '0.0', 'ATS0e': '155.31442800000002', 'ATS1e': '154.65768000000003', 'ATS2e': '283.54863200000005', 'ATS3e': '352.10272800000024', 'ATS4e': '460.9127879999997', 

Unnamed: 0,c_padel_nAcid,c_padel_ALogP,c_padel_ALogp2,c_padel_AMR,c_padel_apol,c_padel_naAromAtom,c_padel_nAromBond,c_padel_nAtom,c_padel_nHeavyAtom,c_padel_nH,...,a_padel_P1s,a_padel_P2s,a_padel_E1s,a_padel_E2s,a_padel_E3s,a_padel_Ts,a_padel_As,a_padel_Vs,a_padel_Ks,a_padel_Ds
0,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891
1,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.6058843410002094,0.316812239190037,0.5146245760177569,0.5036538200929992,0.24239298576716753,2.6314749008793648,1.8231188184303044,4.724982135916807,0.408826511500314,1.2606713818779238
2,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.49473212212926987,0.36852429745957793,0.5025522081777102,0.5296806935895796,0.2908270946112965,3.222598052663257,3.1193380757198717,7.1763117454535434,0.29488462938327176,1.3230599963785863
3,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.558624798137222,0.31567403262424654,0.5462502619200227,0.4727946653481856,0.28864298970395724,3.5294868745019494,3.5658175807059362,8.069919474778954,0.33793719720583304,1.3076879169721656
4,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.5726297312617139,0.31545868775664476,0.49062119255476394,0.4562212060189099,0.3235456230190845,4.243538278759585,5.04264425799529,10.83099508616804,0.3589445968925708,1.2703880215927583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,,,,,,,,,,,...,0.5768351537531632,0.3243332149000508,0.9178118971292629,0.6494799776575642,0.21941037698290836,2.1183255141766004,1.2391720390836856,3.533256403055803,0.3652527306297447,1.7867022517697353
105,,,,,,,,,,,...,0.5768351537531632,0.3243332149000508,0.9178118971292629,0.6494799776575642,0.21941037698290836,2.1183255141766004,1.2391720390836856,3.533256403055803,0.3652527306297447,1.7867022517697353
106,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891
107,0,-1.5741,2.47779081,29.0791,20.03710199999999,0,0,21,7,14,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891


In [None]:
spectros = []
calculator = spectrophore.SpectrophoreCalculator(accuracy = 20.0)
for mol in mols:
  try:
    spectro = calculator.calculate(mol)
  except:
    print(f"Issue with {Chem.MolToSmiles(mol)}, filling with nans")
    spectro = np.empty((48))
    spectro[:] = np.nan
  spectros.append(spectro)

spectros_np = np.array(spectros)

cumulated_spectros = []
for i, row in enumerate(spectros_np):
  if i % 2 == 1:
    cumulated_spectros.append(np.array([spectros_np[i-1], spectros_np[i]]).flatten())
print(f'{len(cumulated_spectros[0]) = }')
print(f'{cumulated_spectros[0] = }')
print(f'{len(cumulated_spectros) = }')
spectros_columns = np.array([['c_spectro_' + f'{i+1}' for i in range(48)], ['a_spectro_' + f'{i+1}' for i in range(48)]]).flatten()
cumulated_desc_spectro = pd.DataFrame(cumulated_spectros, columns = spectros_columns)
cumulated_desc_spectro

Probes initialised: 48 number of probes in total
12 probes are used due to the imposed stereo flag
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
Issue with [Cl-], filling with nans
len(cumulated_spectros[0]) = 96
cumulated_spectros[0] = array([-5.99541128e-01, -2.05251545e-01, -5.54778218e-01, -1.58361471e+00,
        1.55117863e-03,  6.03050053e-01, -1.69170558e+00,  6.03454411e-02,
        9.89216208e-01,  1.13392365e+00,  1.81333256e+00,  3.34704518e-02,
       -9.95810330e-01, -1.57573724e+00, -1.30480707e+00, -4.73919749e-01,
     

Unnamed: 0,c_spectro_1,c_spectro_2,c_spectro_3,c_spectro_4,c_spectro_5,c_spectro_6,c_spectro_7,c_spectro_8,c_spectro_9,c_spectro_10,...,a_spectro_39,a_spectro_40,a_spectro_41,a_spectro_42,a_spectro_43,a_spectro_44,a_spectro_45,a_spectro_46,a_spectro_47,a_spectro_48
0,-0.599541,-0.205252,-0.554778,-1.583615,0.001551,0.603050,-1.691706,0.060345,0.989216,1.133924,...,1.203586,0.976590,-0.064134,-0.449117,-2.136481,1.117011,1.086951,-0.450056,-0.632859,0.496999
1,-1.273218,-0.918078,-0.709198,-1.182517,0.099125,0.952628,-1.185970,0.776628,0.597990,1.448495,...,0.135203,-0.063057,-0.768664,-0.183496,-1.593653,1.374652,1.116793,-0.049689,0.698445,1.513401
2,-0.952932,-0.453855,-0.089020,-1.391466,-0.120206,1.005235,-1.906986,0.400063,1.167128,1.209255,...,0.611354,0.929290,-0.707117,-0.333457,-1.744727,0.058751,0.361224,0.313287,0.862584,1.852705
3,-1.259754,-0.899079,-0.716615,-1.228749,0.061084,0.800873,-1.179409,0.593793,0.628217,1.485244,...,0.725809,0.439443,-0.571865,-0.859940,-1.454457,1.636816,1.247827,-0.165711,-0.269614,1.195670
4,-1.482204,-1.205033,-0.440377,-0.843322,0.041344,0.500523,-1.136769,0.732074,1.357882,1.397031,...,0.286672,-0.386133,0.151888,0.145576,-1.785417,0.753956,0.922692,0.576615,1.467054,0.621649
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,0.190626,2.032328,0.689542,-0.029219,-0.366793,-0.013409,-2.306166,-0.374536,0.956088,0.144445,...,0.304042,0.381095,-0.301438,-0.540997,-1.534763,1.060708,0.826617,0.844958,0.724837,1.156504
105,-0.057265,2.077537,0.345176,-0.026113,0.495598,0.098431,-2.468296,-0.221695,0.552676,0.280648,...,0.216031,0.590496,-0.119839,-0.169718,-1.450083,0.819239,0.583547,0.397429,0.991166,1.405034
106,-1.397589,-1.245943,-0.370605,-1.283648,0.095627,1.137115,-1.109919,0.872986,0.826006,1.270025,...,0.872907,0.867390,-0.436536,-0.756561,-1.849949,1.424428,1.488081,-0.606453,-0.559036,0.427200
107,-1.290536,-0.916815,-0.550932,-1.294049,-0.085242,0.755664,-1.171043,0.485565,0.953019,1.300444,...,1.006991,1.107880,-0.589860,-0.815781,-1.956498,1.001703,1.519580,-0.133961,-0.234359,0.229256


In [None]:
df = pd.concat([data, cumulated_desc_mordred, cumulated_desc_fps, cumulated_desc_spectro, cumulated_desc_padel], axis=1)
df.to_csv('data-with-descs.csv', index=False, sep=';')
df

Unnamed: 0,feed_material,perc_cellulose,perc_hemicellulose,perc_lignins,il_conc,smiles,temp,time,yield,c_mordred_ABC,...,a_padel_P1s,a_padel_P2s,a_padel_E1s,a_padel_E2s,a_padel_E3s,a_padel_Ts,a_padel_As,a_padel_Vs,a_padel_Ks,a_padel_Ds
0,3,37.7,19.80,18.25,1.0,OCC[N+](C)(C)C.N[C@@H](CCCCN)C(=O)[O-],90,24.000000,60.4,4.719397,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891
1,3,37.7,19.80,18.25,1.0,OCC[N+](C)(C)C.NCC(=O)[O-],90,24.000000,59.9,4.719397,...,0.6058843410002094,0.316812239190037,0.5146245760177569,0.5036538200929992,0.24239298576716753,2.6314749008793648,1.8231188184303044,4.724982135916807,0.408826511500314,1.2606713818779238
2,3,37.7,19.80,18.25,1.0,OCC[N+](C)(C)C.N[C@@H](C)C(=O)[O-],90,24.000000,58.3,4.719397,...,0.49473212212926987,0.36852429745957793,0.5025522081777102,0.5296806935895796,0.2908270946112965,3.222598052663257,3.1193380757198717,7.1763117454535434,0.29488462938327176,1.3230599963785863
3,3,37.7,19.80,18.25,1.0,OCC[N+](C)(C)C.N[C@@H](CO)C(=O)[O-],90,24.000000,54.7,4.719397,...,0.558624798137222,0.31567403262424654,0.5462502619200227,0.4727946653481856,0.28864298970395724,3.5294868745019494,3.5658175807059362,8.069919474778954,0.33793719720583304,1.3076879169721656
4,3,37.7,19.80,18.25,1.0,OCC[N+](C)(C)C.N[C@@H]([C@H](O)C)C(=O)[O-],90,24.000000,53.1,4.719397,...,0.5726297312617139,0.31545868775664476,0.49062119255476394,0.4562212060189099,0.3235456230190845,4.243538278759585,5.04264425799529,10.83099508616804,0.3589445968925708,1.2703880215927583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,6,44.7,31.58,22.50,1.0,C(C)N1C=[N+](C=C1)C.C(C)(=O)[O-],185,0.083333,60.4,5.766244,...,0.5768351537531632,0.3243332149000508,0.9178118971292629,0.6494799776575642,0.21941037698290836,2.1183255141766004,1.2391720390836856,3.533256403055803,0.3652527306297447,1.7867022517697353
105,6,44.7,31.58,22.50,1.0,C(C)N1C=[N+](C=C1)C.C(C)(=O)[O-],185,0.250000,79.3,5.766244,...,0.5768351537531632,0.3243332149000508,0.9178118971292629,0.6494799776575642,0.21941037698290836,2.1183255141766004,1.2391720390836856,3.533256403055803,0.3652527306297447,1.7867022517697353
106,7,34.5,21.90,18.60,0.1,OCC[N+](C)(C)C.N[C@@H](CCCCN)C(=O)[O-],140,1.000000,32.7,4.719397,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891
107,7,33.5,22.70,16.30,0.1,OCC[N+](C)(C)C.N[C@@H](CCCCN)C(=O)[O-],140,1.000000,47.2,4.719397,...,0.8214516653615912,0.11720396651726683,0.6054630621617955,0.5276633848176131,0.3440719606841807,8.420617373289296,10.909622643348749,22.856633926519695,0.7321774980423867,1.4771984076635891
