In [1]:
import pandas as pd
import numpy as np

Goal is to make a transformation matrix that converts a bond type vector to atom count vector.

$$
\begin{bmatrix}
   c_\text{(CC1, Br)} & c_\text{(CC2, Br)} & \dots & c_\text{((OS1, Br)}\\
   c_\text{(CC1, C)} & c_\text{(CC2, C)} & \dots & c_\text{(OS1, C)}\\
   \vdots & \vdots & \dots & \vdots\\
   c_\text{(CC1, S)} & c_\text{(CC2, S)} & \dots & c_\text{(OS1, S)}\\
\end{bmatrix}
\cdot
\begin{bmatrix}
   b_\text{CC1} \\
   b_\text{CC2} \\
   \vdots \\
   b_\text{OS1}
\end{bmatrix}
=
\begin{bmatrix}
   a_\text{Br} \\
   a_\text{C} \\
   \vdots \\
   a_\text{S}
\end{bmatrix}
$$

In [2]:
bond_types = pd.read_csv('./data/processed/bond_types.csv')

bond_types.columns

Index(['CC1', 'CC1.5', 'CC2', 'CCl1', 'CF1', 'CH1', 'CN1', 'CN2', 'CO1', 'CO2',
       'CS1', 'CS2', 'HN1', 'HO1', 'NN1', 'NN2', 'NO1', 'NO2', 'NS1', 'NS2',
       'OS1'],
      dtype='object')

In [3]:
atom_counts = pd.read_csv('./data/processed/atom_counts.csv')

atom_counts.columns

Index(['Br', 'C', 'Cl', 'F', 'H', 'N', 'O', 'S'], dtype='object')

In [4]:
obj = {
    'Br':1, 
    'C':4, 
    'Cl':1, 
    'F':1, 
    'H':1, 
    'N':3, 
    'O':2, 
    'S':2
}

In [5]:
TM = pd.DataFrame(
    index = atom_counts.columns, 
    columns = bond_types.columns,
    data=0
)

TM

Unnamed: 0,CC1,CC1.5,CC2,CCl1,CF1,CH1,CN1,CN2,CO1,CO2,...,CS2,HN1,HO1,NN1,NN2,NO1,NO2,NS1,NS2,OS1
Br,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Cl,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
F,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
H,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
N,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
O,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
S,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
for atom in atom_counts.columns:
    for bond in bond_types.columns:
        if atom in bond:
            if bond != "CC1.5":
                coef = int(bond[-1])
            else:
                coef = 1.5
            coef = coef / obj[atom]
            
            TM.loc[atom, bond] = coef

TM

Unnamed: 0,CC1,CC1.5,CC2,CCl1,CF1,CH1,CN1,CN2,CO1,CO2,...,CS2,HN1,HO1,NN1,NN2,NO1,NO2,NS1,NS2,OS1
Br,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
C,0.25,0.375,0.5,0.25,0.25,0.25,0.25,0.5,0.25,0.5,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cl,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
F,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
H,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
N,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,...,0.0,0.333333,0.0,0.333333,0.666667,0.333333,0.666667,0.333333,0.666667,0.0
O,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,...,0.0,0.0,0.5,0.0,0.0,0.5,1.0,0.0,0.0,0.5
S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.5


In [7]:
abs((TM @ bond_types.T - atom_counts.T).T).mean(axis=0)

Br    0.016550
C     2.439079
Cl    0.000094
F     0.000000
H     0.007596
N     0.941912
O     0.752672
S     0.589716
dtype: float64

In [15]:
import tensorflow as tf
from tensorflow import transpose as t

class matrixApproximator(tf.keras.Model):

    def __init__(self, transformation_matrix, y):
        super().__init__()
        self.M = tf.Variable(transformation_matrix, trainable=False, dtype='float32')
        self.X = tf.Variable(np.random.randint(0, 4, bond_types.shape), trainable=True, dtype='float32')
        self.Y = tf.Variable(y, trainable=False, dtype='float32')
        self.mse = tf.keras.losses.MeanSquaredError()

    def call(self, inputs, training=False):
        X_ = inputs
        
        l1 = t(self.M @ t(self.X) - t(self.Y))
        l2 = self.X - X_

        return self.mse(l1) * .9 + self.mse(l2) * .1
    
    def loss(self, y, y_pred):
        return y_pred + y


model = matrixApproximator(
    TM.to_numpy().astype('float32'),
    atom_counts.to_numpy().astype('float32')
)

model.compile(
    loss = model.loss,
    optimizer = 'adam'
)

In [20]:
model.build()

TypeError: build() missing 1 required positional argument: 'input_shape'

In [16]:
model.fit(
    bond_types.to_numpy().astype('float32'), 
    np.zeros((len(bond_types), 1)), 
    epochs=10
)

Epoch 1/10


TypeError: in user code:

    File "/Users/mansurnurmukhambetov/miniforge3/lib/python3.9/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/Users/mansurnurmukhambetov/miniforge3/lib/python3.9/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/mansurnurmukhambetov/miniforge3/lib/python3.9/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/Users/mansurnurmukhambetov/miniforge3/lib/python3.9/site-packages/keras/engine/training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "/Users/mansurnurmukhambetov/miniforge3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    TypeError: Exception encountered when calling layer "matrix_approximator_3" (type matrixApproximator).
    
    in user code:
    
        File "/var/folders/s9/0x7864ms0sj16xzqw_gzzh_w0000gn/T/ipykernel_99264/2739305140.py", line 19, in call  *
            return self.mse(l1) * .9 + self.mse(l2) * .1
    
        TypeError: __call__() missing 1 required positional argument: 'y_pred'
    
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None, 21), dtype=float32)
      • training=True
