Copyright 2021-2023 @ Shenzhen Bay Laboratory & Peking University & Huawei Technologies Co., Ltd

This code is a part of Cybertron package.

The Cybertron is open-source software based on the AI-framework:
MindSpore (https://www.mindspore.cn/)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and
limitations under the License.

Cybertron tutorial 05: Multi-task with multiple readouts (example 1)

In [1]:
import sys
import time
import numpy as np
import mindspore as ms
from mindspore import nn
from mindspore import Tensor
from mindspore import context
from mindspore import dataset as ds
from mindspore.train import Model
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig

from cybertron import Cybertron
from cybertron import MolCT
from cybertron import AtomwiseReadout
from cybertron.train import MAE, MLoss
from cybertron.train import WithLabelLossCell, WithLabelEvalCell
from cybertron.train import TrainMonitor
from cybertron.train import TransformerLR

context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

In [2]:
data_name = sys.path[0] + '/dataset_qm9_normed_'
train_file = data_name + 'trainset_1024.npz'
valid_file = data_name + 'validset_128.npz'

train_data = np.load(train_file)
valid_data = np.load(valid_file)

idx = [7, 8, 9, 10]  # U0,U,G,H

num_atom = int(train_data['num_atoms'])
scale = Tensor(train_data['scale'][idx], ms.float32)
shift = Tensor(train_data['shift'][idx], ms.float32)
ref = Tensor(train_data['type_ref'][:, idx], ms.float32)

In [3]:
mod = MolCT(
    cutoff=1,
    n_interaction=3,
    dim_feature=128,
    n_heads=8,
    activation='swish',
    max_cycles=1,
    length_unit='nm',
)

In [4]:
readout0 = AtomwiseReadout(mod, dim_output=1)
readout1 = AtomwiseReadout(mod, dim_output=1)
readout2 = AtomwiseReadout(mod, dim_output=1)
readout3 = AtomwiseReadout(mod, dim_output=1)

In [5]:
net = Cybertron(mod, readout=[
    readout0, readout1, readout2, readout3], num_atoms=num_atom, length_unit='nm')

In [6]:
net.set_scaleshift([1, 1, 1], 0, readout_id=[0, 1, 3])

Cybertron<
  (model): MolCT<
    (activation): Swish<
      (sigmoid): Sigmoid<>
      >
    (atom_embedding): Embedding<vocab_size=64, embedding_size=128, use_one_hot=True, embedding_table=Parameter (name=model.atom_embedding.embedding_table, shape=(64, 128), dtype=Float32, requires_grad=True), dtype=Float32, padding_idx=None>
    (cutoff_fn): SmoothCutoff<>
    (rbf): LogGaussianBasis<>
    (dis_filter): ResFilter<
      (linear): Dense<input_channels=64, output_channels=128, has_bias=True>
      (residual): Residual<
        (nonlinear): MLP<
          (mlp): SequentialCell<
            (0): Dense<
              input_channels=128, output_channels=128, has_bias=True, activation=Swish<>
              (activation): Swish<
                (sigmoid): Sigmoid<>
                >
              >
            (1): Dense<input_channels=128, output_channels=128, has_bias=True>
            >
          >
        >
      >
    (interactions): CellList<
      (0): NeuralInteractionUnit<
        (

In [7]:
net.print_info()

Cybertron Engine, Ride-on!
--------------------------------------------------------------------------------
    Length unit: nm
    Input unit scale: 1
--------------------------------------------------------------------------------
    Deep molecular model:  MolCT
--------------------------------------------------------------------------------
       Length unit: nm
       Atom embedding size: 64
       Cutoff distance: 1.0 nm
       Radical basis function (RBF): LogGaussianBasis
          Minimum distance: 0.04 nm
          Maximum distance: 1.0 nm
          Reference distance: 1.0 nm
          Log Gaussian begin: -3.218876
          Log Gaussian end: 0.006724119
          Interval for log Gaussian: 0.0512
          Sigma for log gaussian: 0.3
          Number of basis functions: 64
          Rescale the range of RBF to (-1,1).
       Calculate distance: Yes
       Calculate bond: No
       Feature dimension: 128
-----------------------------------------------------------------------

In [8]:
tot_params = 0
for i, param in enumerate(net.get_parameters()):
    tot_params += param.size
    print(i, param.name, param.shape)
print('Total parameters: ', tot_params)

0 model.atom_embedding.embedding_table (64, 128)
1 model.dis_filter.linear.weight (128, 64)
2 model.dis_filter.linear.bias (128,)
3 model.dis_filter.residual.nonlinear.mlp.0.weight (128, 128)
4 model.dis_filter.residual.nonlinear.mlp.0.bias (128,)
5 model.dis_filter.residual.nonlinear.mlp.1.weight (128, 128)
6 model.dis_filter.residual.nonlinear.mlp.1.bias (128,)
7 model.interactions.0.positional_embedding.norm.gamma (128,)
8 model.interactions.0.positional_embedding.norm.beta (128,)
9 model.interactions.0.positional_embedding.x2q.weight (128, 128)
10 model.interactions.0.positional_embedding.x2k.weight (128, 128)
11 model.interactions.0.positional_embedding.x2v.weight (128, 128)
12 model.interactions.0.multi_head_attention.output.weight (128, 128)
13 model.interactions.1.positional_embedding.norm.gamma (128,)
14 model.interactions.1.positional_embedding.norm.beta (128,)
15 model.interactions.1.positional_embedding.x2q.weight (128, 128)
16 model.interactions.1.positional_embedding.x2k.

In [9]:
N_EPOCH = 8
REPEAT_TIME = 1
BATCH_SIZE = 32

In [10]:
ds_train = ds.NumpySlicesDataset(
    {'R': train_data['R'], 'Z': train_data['Z'], 'E': train_data['E'][:, idx]}, shuffle=True)
ds_train = ds_train.batch(BATCH_SIZE, drop_remainder=True)
ds_train = ds_train.repeat(REPEAT_TIME)

In [11]:
ds_valid = ds.NumpySlicesDataset(
    {'R': valid_data['R'], 'Z': valid_data['Z'], 'E': valid_data['E'][:, idx]}, shuffle=False)
ds_valid = ds_valid.batch(128)
ds_valid = ds_valid.repeat(1)

In [12]:
loss_network = WithLabelLossCell('RZE', net, nn.MAELoss())
eval_network = WithLabelEvalCell('RZE', net, nn.MAELoss(), scale=scale, shift=shift, type_ref=ref)

WithLabelLossCell with input type: RZE
WithLabelEvalCell with input type: RZE
   with scaleshift for training and evaluate dataset:
   Output.            Scale           Shift        Mode
   0:        1.824854e+01   -4.094204e+02    Atomwise
   1:        1.818079e+01   -4.118893e+02    Atomwise
   2:        1.816252e+01   -4.142276e+02    Atomwise
   3:        1.807273e+01   -3.811549e+02    Atomwise
   with reference value for atom types:
   Type     Label0    Label1    Label2    Label3
   0:        0.00e+00  0.00e+00  0.00e+00  0.00e+00
   1:       -1.31e+03 -1.31e+03 -1.31e+03 -1.34e+03
   2:        0.00e+00  0.00e+00  0.00e+00  0.00e+00
   3:        0.00e+00  0.00e+00  0.00e+00  0.00e+00
   4:        0.00e+00  0.00e+00  0.00e+00  0.00e+00
   5:        0.00e+00  0.00e+00  0.00e+00  0.00e+00
   6:       -9.94e+04 -9.94e+04 -9.94e+04 -9.94e+04
   7:       -1.43e+05 -1.43e+05 -1.43e+05 -1.43e+05
   8:       -1.97e+05 -1.97e+05 -1.97e+05 -1.97e+05
   9:       -2.62e+05 -2.62e+05 -2.62e+

In [13]:
lr = TransformerLR(learning_rate=1., warmup_steps=4000, dimension=128)
optim = nn.Adam(params=net.trainable_params(), learning_rate=lr)

In [14]:
eval_mae = 'EvalMAE'
atom_mae = 'AtomMAE'
eval_loss = 'Evalloss'
model = Model(loss_network, optimizer=optim, eval_network=eval_network,
              metrics={eval_mae: MAE([1, 2], reduce_all_dims=False),
                       atom_mae: MAE([1, 2, 3], reduce_all_dims=False, averaged_by_atoms=True),
                       eval_loss: MLoss(0)})

In [15]:
outdir = 'Tutorial_C05'
outname = outdir + '_' + net.model_name
record_cb = TrainMonitor(model, outname, per_step=16, avg_steps=16,
                         directory=outdir, eval_dataset=ds_valid, best_ckpt_metrics=eval_loss)

In [16]:
config_ck = CheckpointConfig(save_checkpoint_steps=32, keep_checkpoint_max=64, append_info=[net.hyper_param])
ckpoint_cb = ModelCheckpoint(prefix=outname, directory=outdir, config=config_ck)

In [17]:
print("Start training ...")
beg_time = time.time()
model.train(N_EPOCH, ds_train, callbacks=[record_cb, ckpoint_cb], dataset_sink_mode=False)
end_time = time.time()
used_time = end_time - beg_time
m, s = divmod(used_time, 60)
h, m = divmod(m, 60)
print("Training Fininshed!")
print("Training Time: %02d:%02d:%02d" % (h, m, s))



Start training ...
Epoch: 1, Step: 16, Learning_rate: 5.2407836e-06, Last_Loss: 21.776154, Avg_loss: 21.48903799057007, EvalMAE: [477.3169  208.4878  203.64258 357.4829 ], AtomMAE: [25.44254  12.228753 12.051631 19.204245], Evalloss: 17.154184341430664
Epoch: 1, Step: 32, Learning_rate: 1.0830951e-05, Last_Loss: 15.078869, Avg_loss: 17.91274631023407, EvalMAE: [315.22852 207.03613 200.96289 257.17676], AtomMAE: [16.944658 12.277215 11.844267 14.266481], Evalloss: 13.489100456237793
Epoch: 2, Step: 48, Learning_rate: 1.6421121e-05, Last_Loss: 14.07023, Avg_loss: 15.308237612247467, EvalMAE: [218.44775 205.50586 196.97705 223.73242], AtomMAE: [12.739961 12.304991 11.650617 13.239799], Evalloss: 11.624649047851562
Epoch: 2, Step: 64, Learning_rate: 2.2011289e-05, Last_Loss: 12.007715, Avg_loss: 13.159273028373718, EvalMAE: [225.1123  200.38965 194.05762 223.02881], AtomMAE: [13.46082  11.980479 11.533082 13.308242], Evalloss: 11.595827102661133
Epoch: 3, Step: 80, Learning_rate: 2.760146e