From 85bc5866f78478a3892dd8dad6b7f1cfd1bf99b7 Mon Sep 17 00:00:00 2001
From: Chi Chen <chc273@eng.ucsd.edu>
Date: Mon, 27 Jul 2020 13:06:46 -0700
Subject: [PATCH] fix pydocstyle

---
 megnet/__init__.py                            |   3 +
 megnet/activations.py                         |   3 +
 megnet/callbacks.py                           |  52 +++---
 megnet/config.py                              |   4 +
 megnet/data/crystal.py                        |  24 +++
 megnet/data/graph.py                          | 156 ++++++++++++++----
 megnet/data/local_env.py                      |  18 +-
 megnet/data/molecule.py                       |  14 ++
 .../layers/featurizer/_gaussian_expansion.py  |   9 +-
 megnet/layers/graph/base.py                   |  53 +++---
 megnet/layers/graph/cgcnn.py                  |  86 ++++++++--
 megnet/layers/graph/megnet.py                 | 102 ++++++++++--
 megnet/layers/graph/schnet.py                 |  83 +++++++++-
 megnet/layers/readout/linear.py               |  36 +++-
 megnet/layers/readout/set2set.py              |  67 +++++---
 megnet/losses.py                              |   3 +
 megnet/models/megnet.py                       |   8 +
 megnet/utils/descriptor.py                    |  12 +-
 megnet/utils/preprocessing.py                 |  14 +-
 19 files changed, 599 insertions(+), 148 deletions(-)

diff --git a/megnet/__init__.py b/megnet/__init__.py
index 3d30a5ffb..0621966d2 100644
--- a/megnet/__init__.py
+++ b/megnet/__init__.py
@@ -1 +1,4 @@
+"""
+MatErials Graph Network (MEGNet) package
+"""
 __version__ = "1.1.8"
diff --git a/megnet/activations.py b/megnet/activations.py
index f9e9af661..b4e3aba22 100644
--- a/megnet/activations.py
+++ b/megnet/activations.py
@@ -1,3 +1,6 @@
+"""
+Activation functions used in neural networks
+"""
 from typing import Callable, Any
 
 import tensorflow.keras.backend as kb
diff --git a/megnet/callbacks.py b/megnet/callbacks.py
index e0f1b0f3e..870f9e8da 100644
--- a/megnet/callbacks.py
+++ b/megnet/callbacks.py
@@ -1,3 +1,6 @@
+"""
+callbacks functions used in training process
+"""
 import logging
 import os
 import re
@@ -22,20 +25,6 @@
 class ModelCheckpointMAE(Callback):
     """
     Save the best MAE model with target scaler
-
-    Args:
-        filepath (string): path to save the model file with format. For example
-            `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and
-            val_mae in the filename
-        monitor (string): quantity to monitor, default to "val_mae"
-        verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log
-        save_best_only (bool): whether to save only the best model
-        save_weights_only (bool): whether to save the weights only excluding model structure
-        val_gen (generator): validation generator
-        steps_per_val (int): steps per epoch for validation generator
-        target_scaler (object): exposing inverse_transform method to scale the output
-        period (int): number of epoch interval for this callback
-        mode: (string) choose from "min", "max" or "auto"
     """
 
     def __init__(self,
@@ -49,6 +38,21 @@ def __init__(self,
                  target_scaler: Scaler = None,
                  period: int = 1,
                  mode: str = 'auto'):
+        """
+        Args:
+            filepath (string): path to save the model file with format. For example
+                `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and
+                val_mae in the filename
+            monitor (string): quantity to monitor, default to "val_mae"
+            verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log
+            save_best_only (bool): whether to save only the best model
+            save_weights_only (bool): whether to save the weights only excluding model structure
+            val_gen (generator): validation generator
+            steps_per_val (int): steps per epoch for validation generator
+            target_scaler (object): exposing inverse_transform method to scale the output
+            period (int): number of epoch interval for this callback
+            mode: (string) choose from "min", "max" or "auto"
+        """
         super().__init__()
         if val_gen is None:
             raise ValueError('No validation data is provided!')
@@ -167,15 +171,6 @@ class ReduceLRUponNan(Callback):
     It has an extra function that patience for early stopping.
     This will move to indepedent callback in the future.
 
-    Args:
-        filepath (str): filepath for saved model checkpoint, should be consistent with
-            checkpoint callback
-        factor (float): a value < 1 for scaling the learning rate
-        verbose (bool): whether to show the loading event
-        patience (int): number of steps that the val mae does not change.
-            It is a criteria for early stopping
-        monitor (str): target metric to monitor
-        mode (str): min, max or auto
     """
 
     def __init__(self,
@@ -185,6 +180,17 @@ def __init__(self,
                  patience: int = 500,
                  monitor: str = 'val_mae',
                  mode: str = 'auto'):
+        """
+        Args:
+            filepath (str): filepath for saved model checkpoint, should be consistent with
+                checkpoint callback
+            factor (float): a value < 1 for scaling the learning rate
+            verbose (bool): whether to show the loading event
+            patience (int): number of steps that the val mae does not change.
+                It is a criteria for early stopping
+            monitor (str): target metric to monitor
+            mode (str): min, max or auto
+        """
         self.filepath = filepath
         self.verbose = verbose
         self.factor = factor
diff --git a/megnet/config.py b/megnet/config.py
index 92018a695..3acc30764 100644
--- a/megnet/config.py
+++ b/megnet/config.py
@@ -10,6 +10,10 @@
 
 
 class DataType:
+    """
+    Data types for tensorflow. This enables users to choose
+    from 32-bit float and int, and 16-bit float and int
+    """
     np_float = np.float32
     np_int = np.int32
     tf_float = tf.float32
diff --git a/megnet/data/crystal.py b/megnet/data/crystal.py
index 2e31580d8..607ff3619 100644
--- a/megnet/data/crystal.py
+++ b/megnet/data/crystal.py
@@ -28,6 +28,14 @@ def __init__(self,
                  bond_converter: Converter = None,
                  cutoff: float = 5.0
                  ):
+        """
+        Convert the structure into crystal graph
+        Args:
+            nn_strategy (str): NearNeighbor strategy
+            atom_converter (Converter): atom features converter
+            bond_converter (Converter): bond features converter
+            cutoff (float): cutoff radius
+        """
         self.cutoff = cutoff
         super().__init__(nn_strategy=nn_strategy, atom_converter=atom_converter,
                          bond_converter=bond_converter, cutoff=self.cutoff)
@@ -46,10 +54,26 @@ def __init__(self,
                  nn_strategy: Union[str, NearNeighbors] = 'VoronoiNN',
                  atom_converter: Converter = None,
                  bond_converter: Converter = None):
+        """
+
+        Args:
+            nn_strategy (str): NearNeighbor strategy
+            atom_converter (Converter): atom features converter
+            bond_converter (Converter): bond features converter
+        """
         super().__init__(nn_strategy=nn_strategy, atom_converter=atom_converter,
                          bond_converter=bond_converter)
 
     def convert(self, structure: Structure, state_attributes: List = None) -> Dict:
+        """
+        Convert structure into graph
+        Args:
+            structure (Structure): pymatgen Structure
+            state_attributes (list): state attributes
+
+        Returns: graph dictionary
+
+        """
         graph = super().convert(structure, state_attributes=state_attributes)
         return self._get_bond_type(graph)
 
diff --git a/megnet/data/graph.py b/megnet/data/graph.py
index f75d61696..65ba7bb1c 100644
--- a/megnet/data/graph.py
+++ b/megnet/data/graph.py
@@ -22,6 +22,13 @@ class Converter(MSONable):
     Base class for atom or bond converter
     """
     def convert(self, d: Any) -> Any:
+        """
+        Convert the object d
+        Args:
+            d (Any): Any object d
+
+        Returns: returned object
+        """
         raise NotImplementedError
 
 
@@ -42,6 +49,14 @@ def __init__(self,
                  atom_converter: Converter = None,
                  bond_converter: Converter = None,
                  **kwargs):
+        """
+
+        Args:
+            nn_strategy (str or NearNeighbors): NearNeighbor strategy
+            atom_converter (Converter): atom converter
+            bond_converter (Converter): bond converter
+            **kwargs:
+        """
 
         if isinstance(nn_strategy, str):
             strategy = local_env.get(nn_strategy)
@@ -111,6 +126,14 @@ def get_atom_features(structure) -> List[int]:
                         dtype='int32').tolist()
 
     def __call__(self, structure: Structure) -> Dict:
+        """
+        Directly apply the converter to structure, alias to convert
+        Args:
+            structure (Structure): input structure
+
+        Returns (dict): graph dictionary
+
+        """
         return self.convert(structure)
 
     def get_input(self, structure: Structure) -> List[np.ndarray]:
@@ -168,6 +191,10 @@ def _get_dummy_converter() -> 'DummyConverter':
         return DummyConverter()
 
     def as_dict(self) -> Dict:
+        """
+        Serialize to dict
+        Returns: (dict) dictionary of information
+        """
         all_dict = super().as_dict()
         if 'nn_strategy' in all_dict:
             nn_strategy = all_dict.pop('nn_strategy')
@@ -176,6 +203,14 @@ def as_dict(self) -> Dict:
 
     @classmethod
     def from_dict(cls, d: Dict) -> 'StructureGraph':
+        """
+        Initialization from dictionary
+        Args:
+            d (dict): dictionary
+
+        Returns: StructureGraph object
+
+        """
         if 'nn_strategy' in d:
             nn_strategy = d.pop('nn_strategy')
             nn_strategy_obj = local_env.deserialize(nn_strategy)
@@ -220,6 +255,14 @@ def convert(self, structure: Structure, state_attributes: List = None) -> Dict:
 
     @classmethod
     def from_structure_graph(cls, structure_graph: StructureGraph) -> 'StructureGraphFixedRadius':
+        """
+        Initialize from pymatgen StructureGraph
+        Args:
+            structure_graph (StructureGraph): pymatgen StructureGraph object
+
+        Returns: StructureGraphFixedRadius object
+
+        """
         return cls(nn_strategy=structure_graph.nn_strategy,
                    atom_converter=structure_graph.atom_converter,
                    bond_converter=structure_graph.bond_converter)
@@ -231,17 +274,27 @@ class DummyConverter(Converter):
     """
 
     def convert(self, d: Any) -> Any:
+        """
+        Dummy convert, does nothing to input
+        Args:
+            d (Any): input object
+
+        Returns: d
+
+        """
         return d
 
 
 class EmbeddingMap(Converter):
     """
     Convert an integer to a row vector in a feature matrix
-    Args:
-        feature_matrix: (np.ndarray) A matrix of shape (N, M)
     """
 
     def __init__(self, feature_matrix: np.ndarray):
+        """
+        Args:
+            feature_matrix: (np.ndarray) A matrix of shape (N, M)
+        """
         self.feature_matrix = np.array(feature_matrix)
 
     def convert(self, int_array: np.ndarray) -> np.ndarray:
@@ -258,12 +311,15 @@ def convert(self, int_array: np.ndarray) -> np.ndarray:
 class GaussianDistance(Converter):
     """
     Expand distance with Gaussian basis sit at centers and with width 0.5.
-    Args:
-        centers: (np.array)
-        width: (float)
     """
 
     def __init__(self, centers: np.ndarray = np.linspace(0, 5, 100), width=0.5):
+        """
+
+        Args:
+            centers: (np.array) centers for the Gaussian basis
+            width: (float) width of Gaussian basis
+        """
         self.centers = centers
         self.width = width
 
@@ -378,16 +434,43 @@ def _combine_graph_data(self,
         return inputs
 
     def on_epoch_end(self):
+        """
+        code to be executed on epoch end
+        """
         if self.is_shuffle:
             self.mol_index = np.random.permutation(self.mol_index)
 
     def process_atom_feature(self, x: np.ndarray) -> np.ndarray:
+        """
+        Args:
+            x (np.ndarray): atom features
+
+        Returns:
+            processed atom features
+
+        """
         return x
 
     def process_bond_feature(self, x: np.ndarray) -> np.ndarray:
+        """
+        Args:
+            x (np.ndarray): bond features
+
+        Returns:
+            processed bond features
+
+        """
         return x
 
     def process_state_feature(self, x: np.ndarray) -> np.ndarray:
+        """
+        Args:
+            x (np.ndarray): state features
+
+        Returns:
+            processed state features
+
+        """
         return x
 
     def __getitem__(self, index: int) -> tuple:
@@ -430,18 +513,6 @@ class GraphBatchGenerator(BaseGraphBatchGenerator):
     """
     A generator class that assembles several structures (indicated by
     batch_size) and form (x, y) pairs for model training.
-    Args:
-        atom_features: (list of np.array) list of atom feature matrix,
-        bond_features: (list of np.array) list of bond features matrix
-        state_features: (list of np.array) list of [1, G] state features,
-            where G is the global state feature dimension
-        index1_list: (list of integer) list of (M, ) one side atomic index of the bond,
-        M is different for different structures
-        index2_list: (list of integer) list of (M, ) the other side atomic
-            index of the bond, M is different for different structures,
-            but it has to be the same as the corresponding index1.
-        targets: (numpy array), N*1, where N is the number of structures
-        batch_size: (int) number of samples in a batch
     """
 
     def __init__(self,
@@ -453,6 +524,20 @@ def __init__(self,
                  targets: np.ndarray = None,
                  batch_size: int = 128,
                  is_shuffle: bool = True):
+        """
+        Args:
+            atom_features: (list of np.array) list of atom feature matrix,
+            bond_features: (list of np.array) list of bond features matrix
+            state_features: (list of np.array) list of [1, G] state features,
+                where G is the global state feature dimension
+            index1_list: (list of integer) list of (M, ) one side atomic index of the bond,
+            M is different for different structures
+            index2_list: (list of integer) list of (M, ) the other side atomic
+                index of the bond, M is different for different structures,
+                but it has to be the same as the corresponding index1.
+            targets: (numpy array), N*1, where N is the number of structures
+            batch_size: (int) number of samples in a batch
+        """
         super().__init__(len(atom_features), targets, batch_size, is_shuffle)
         self.atom_features = atom_features
         self.bond_features = bond_features
@@ -486,18 +571,6 @@ def _generate_inputs(self, batch_index: list) -> tuple:
 class GraphBatchDistanceConvert(GraphBatchGenerator):
     """
     Generate batch of structures with bond distance being expanded using a Expansor
-    Args:
-        atom_features: (list of np.array) list of atom feature matrix,
-        bond_features: (list of np.array) list of bond features matrix
-        state_features: (list of np.array) list of [1, G] state features, where G is the global state feature dimension
-        index1_list: (list of integer) list of (M, ) one side atomic index of the bond, M is different for different
-            structures
-        index2_list: (list of integer) list of (M, ) the other side atomic index of the bond, M is different for
-            different structures, but it has to be the same as the correponding index1.
-        targets: (numpy array), N*1, where N is the number of structures
-        batch_size: (int) number of samples in a batch
-        is_shuffle: (bool) whether to shuffle the structure, default to True
-        distance_converter: (bool) converter for processing the distances
     """
 
     def __init__(self,
@@ -510,6 +583,23 @@ def __init__(self,
                  batch_size: int = 128,
                  is_shuffle: bool = True,
                  distance_converter: Converter = None):
+        """
+
+        Args:
+            atom_features: (list of np.array) list of atom feature matrix,
+            bond_features: (list of np.array) list of bond features matrix
+            state_features: (list of np.array) list of [1, G] state features,
+                where G is the global state feature dimension
+            index1_list: (list of integer) list of (M, ) one side atomic index
+                of the bond, M is different for different structures
+            index2_list: (list of integer) list of (M, ) the other side atomic
+                index of the bond, M is different for different structures,
+                but it has to be the same as the correponding index1.
+            targets: (numpy array), N*1, where N is the number of structures
+            batch_size: (int) number of samples in a batch
+            is_shuffle: (bool) whether to shuffle the structure, default to True
+            distance_converter: (bool) converter for processing the distances
+        """
         super().__init__(atom_features=atom_features,
                          bond_features=bond_features,
                          state_features=state_features,
@@ -523,6 +613,14 @@ def __init__(self,
         self.distance_converter = distance_converter
 
     def process_bond_feature(self, x) -> np.ndarray:
+        """
+        Convert bond distances into Gaussian expanded vectors
+        Args:
+            x (np.ndarray): input distance array
+
+        Returns: expanded matrix
+
+        """
         return self.distance_converter.convert(x)
 
 
diff --git a/megnet/data/local_env.py b/megnet/data/local_env.py
index b43f65262..b51cc7bb6 100644
--- a/megnet/data/local_env.py
+++ b/megnet/data/local_env.py
@@ -26,13 +26,14 @@
 class MinimumDistanceNNAll(NearNeighbors):
     """
     Determine bonded sites by fixed cutoff
-
-    Args:.
-        cutoff (float): cutoff radius in Angstrom to look for trial
-            near-neighbor sites (default: 4.0).
     """
 
     def __init__(self, cutoff: float = 4.0):
+        """
+        Args:.
+            cutoff (float): cutoff radius in Angstrom to look for trial
+                near-neighbor sites (default: 4.0).
+        """
         self.cutoff = cutoff
 
     def get_nn_info(self, structure: Structure,
@@ -71,6 +72,15 @@ class AllAtomPairs(NearNeighbors):
     """
 
     def get_nn_info(self, molecule: Molecule, n: int) -> List[Dict]:
+        """
+        Get near neighbor information
+        Args:
+            molecule (Molecule): pymatgen Molecule
+            n (int): number of molecule
+
+        Returns: List of neighbor dictionary
+
+        """
         site = molecule[n]
         siw = []
         for i, s in enumerate(molecule):
diff --git a/megnet/data/molecule.py b/megnet/data/molecule.py
index d4becb36b..85be55e67 100644
--- a/megnet/data/molecule.py
+++ b/megnet/data/molecule.py
@@ -55,6 +55,12 @@ def __init__(self,
                  atom_converter: Converter = None,
                  bond_converter: Converter = None,
                  ):
+        """
+        Args:
+            nn_strategy (str): NearNeighbor strategy
+            atom_converter (Converter): atomic features converter object
+            bond_converter (Converter): bond features converter object
+        """
         if bond_converter is None:
             bond_converter = GaussianDistance(np.linspace(0, 4, 20), 0.5)
         super().__init__(nn_strategy=nn_strategy, atom_converter=atom_converter,
@@ -453,6 +459,14 @@ def dijkstra_distance(bonds: List[List[int]]) -> np.ndarray:
 
 
 def mol_from_smiles(smiles: str):
+    """
+    load molecule object from smiles string
+    Args:
+        smiles (string): smiles string
+
+    Returns:
+        openbabel molecule
+    """
     mol = pybel.readstring(format='smi', string=smiles)
     mol.make3D()
     return mol
diff --git a/megnet/layers/featurizer/_gaussian_expansion.py b/megnet/layers/featurizer/_gaussian_expansion.py
index 018667145..91ddc9066 100644
--- a/megnet/layers/featurizer/_gaussian_expansion.py
+++ b/megnet/layers/featurizer/_gaussian_expansion.py
@@ -12,11 +12,14 @@ class GaussianExpansion(Layer):
     A vector of distance [d1, d2, d3, ..., dn] is expanded to a
     matrix of shape [n, m], where m is the number of Gaussian basis centers
 
-    Args:
-        centers (np.ndarray): Gaussian basis centers
-        width (float): width of the Gaussian basis
     """
     def __init__(self, centers, width, **kwargs):
+        """
+        Args:
+            centers (np.ndarray): Gaussian basis centers
+            width (float): width of the Gaussian basis
+            **kwargs:
+        """
         self.centers = np.array(centers).ravel()
         self.width = width
         super().__init__(**kwargs)
diff --git a/megnet/layers/graph/base.py b/megnet/layers/graph/base.py
index b14fb36e4..6e3d69613 100644
--- a/megnet/layers/graph/base.py
+++ b/megnet/layers/graph/base.py
@@ -25,21 +25,6 @@ class GraphNetworkLayer(Layer):
     neural networks for each update function, and sum or mean for each
     aggregation function
 
-    Args:
-        activation (str): Default: None. The activation function used for each
-            sub-neural network. Examples include 'relu', 'softmax', 'tanh',
-            'sigmoid' and etc.
-        use_bias (bool): Default: True. Whether to use the bias term in the
-            neural network.
-        kernel_initializer (str): Default: 'glorot_uniform'. Initialization
-            function for the layer kernel weights,
-        bias_initializer (str): Default: 'zeros'
-        activity_regularizer (str): Default: None. The regularization function
-            for the output
-        kernel_constraint (str): Default: None. Keras constraint for kernel
-            values
-        bias_constraint (str): Default: None .Keras constraint for bias values
-
     Method:
         call(inputs, mask=None): the logic of the layer, returns the final graph
         compute_output_shape(input_shape): compute static output shapes, returns list of tuple shapes
@@ -65,6 +50,23 @@ def __init__(self,
                  kernel_constraint: OptStrOrCallable = None,
                  bias_constraint: OptStrOrCallable = None,
                  **kwargs):
+        """
+        Args:
+            activation (str): Default: None. The activation function used for each
+                sub-neural network. Examples include 'relu', 'softmax', 'tanh',
+                'sigmoid' and etc.
+            use_bias (bool): Default: True. Whether to use the bias term in the
+                neural network.
+            kernel_initializer (str): Default: 'glorot_uniform'. Initialization
+                function for the layer kernel weights,
+            bias_initializer (str): Default: 'zeros'
+            activity_regularizer (str): Default: None. The regularization function
+                for the output
+            kernel_constraint (str): Default: None. Keras constraint for kernel
+                values
+            bias_constraint (str): Default: None .Keras constraint for bias values
+            **kwargs:
+        """
         if 'input_shape' not in kwargs and 'input_dim' in kwargs:
             kwargs['input_shape'] = (kwargs.pop('input_dim'),)
         self.activation = activations.get(activation)  # noqa
@@ -79,6 +81,15 @@ def __init__(self,
         super().__init__(**kwargs)
 
     def call(self, inputs: Sequence, mask=None) -> Sequence:
+        """
+        Core logic of graph network
+        Args:
+            inputs (Sequence): input tensors
+            mask (tensor): mask tensor
+
+        Returns: output tensor
+
+        """
         e_p = self.phi_e(inputs)
         b_ei_p = self.rho_e_v(e_p, inputs)
         v_p = self.phi_v(b_ei_p, inputs)
@@ -88,7 +99,7 @@ def call(self, inputs: Sequence, mask=None) -> Sequence:
         return [v_p, e_p, u_p]
 
     def phi_e(self, inputs: Sequence) -> tf.Tensor:
-        """
+        r"""
         This is for updating the edge attributes
         ek' = phi_e(ek, vrk, vsk, u)
 
@@ -101,7 +112,7 @@ def phi_e(self, inputs: Sequence) -> tf.Tensor:
         raise NotImplementedError
 
     def rho_e_v(self, e_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
-        """
+        r"""
         This is for step 2, aggregate edge attributes per node
         Ei' = {(ek', rk, sk)} with rk =i, k=1:Ne
 
@@ -114,7 +125,7 @@ def rho_e_v(self, e_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
         raise NotImplementedError
 
     def phi_v(self, b_ei_p: tf.Tensor, inputs: Sequence):
-        """
+        r"""
         Step 3. Compute updated node attributes
         v_i' = phi_v(\bar e_i, vi, u)
 
@@ -127,7 +138,7 @@ def phi_v(self, b_ei_p: tf.Tensor, inputs: Sequence):
         raise NotImplementedError
 
     def rho_e_u(self, e_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
-        """
+        r"""
         let V' = {v'} i = 1:Nv
         let E' = {(e_k', rk, sk)} k = 1:Ne
         \bar e' = rho_e_u(E')
@@ -141,7 +152,7 @@ def rho_e_u(self, e_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
         raise NotImplementedError
 
     def rho_v_u(self, v_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
-        """
+        r"""
         \bar v' = rho_v_u(V')
 
         Args:
@@ -153,7 +164,7 @@ def rho_v_u(self, v_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
         raise NotImplementedError
 
     def phi_u(self, b_e_p: tf.Tensor, b_v_p: tf.Tensor, inputs: Sequence) -> tf.Tensor:
-        """
+        r"""
         u' = phi_u(\bar e', \bar v', u)
         Args:
             b_e_p (tf.Tensor): edge/bond to global aggregated tensor
diff --git a/megnet/layers/graph/cgcnn.py b/megnet/layers/graph/cgcnn.py
index 1c444f419..d49455cd0 100644
--- a/megnet/layers/graph/cgcnn.py
+++ b/megnet/layers/graph/cgcnn.py
@@ -1,3 +1,6 @@
+"""
+CGCNN layer
+"""
 import tensorflow as tf
 import tensorflow.keras.backend as kb
 
@@ -10,16 +13,6 @@ class CrystalGraphLayer(GraphNetworkLayer):
 
     Xie et al. PHYSICAL REVIEW LETTERS 120, 145301 (2018)
 
-    Args:
-        activation (str): Default: None. The activation function used for each sub-neural network. Examples include
-            'relu', 'softmax', 'tanh', 'sigmoid' and etc.
-        use_bias (bool): Default: True. Whether to use the bias term in the neural network.
-        kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
-        bias_initializer (str): Default: 'zeros'
-        activity_regularizer (str): Default: None. The regularization function for the output
-        kernel_constraint (str): Default: None. Keras constraint for kernel values
-        bias_constraint (str): Default: None .Keras constraint for bias values
-
     Methods:
         call(inputs, mask=None): the logic of the layer, returns the final graph
         compute_output_shape(input_shape): compute static output shapes, returns list of tuple shapes
@@ -45,6 +38,19 @@ def __init__(self,
                  kernel_constraint=None,
                  bias_constraint=None,
                  **kwargs):
+        """
+
+        Args:
+            activation (str): Default: None. The activation function used for each sub-neural network. Examples include
+                'relu', 'softmax', 'tanh', 'sigmoid' and etc.
+            use_bias (bool): Default: True. Whether to use the bias term in the neural network.
+            kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
+            bias_initializer (str): Default: 'zeros'
+            activity_regularizer (str): Default: None. The regularization function for the output
+            kernel_constraint (str): Default: None. Keras constraint for kernel values
+            bias_constraint (str): Default: None .Keras constraint for bias values
+            kwargs (dictionary): additional keyword args
+        """
 
         super().__init__(activation=activation,
                          use_bias=use_bias,
@@ -58,6 +64,12 @@ def __init__(self,
                          **kwargs)
 
     def build(self, input_shapes):
+        """
+        Build the weights for the layer
+        Args:
+            input_shapes (sequence of tuple): the shapes of all input tensors
+
+        """
         vdim = input_shapes[0][2]
         edim = input_shapes[1][2]
 
@@ -82,9 +94,24 @@ def build(self, input_shapes):
         self.built = True
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute output shapes from input shapes
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        Returns: sequence of tuples output shapes
+
+        """
         return input_shape
 
     def phi_e(self, inputs):
+        """
+        Edge update function
+        Args:
+            inputs (tuple of tensor)
+        Returns:
+            output tensor
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         return edges
 
@@ -111,16 +138,49 @@ def rho_e_v(self, e_p, inputs):
         return tf.transpose(a=tf.math.segment_sum(tf.transpose(a=summed, perm=[1, 0, 2]), index1), perm=[1, 0, 2])
 
     def phi_v(self, b_ei_p, inputs):
+        """
+        Node update function
+        Args:
+            b_ei_p (tensor): edge aggregated tensor
+            inputs (tuple of tensors): other graph inputs
+
+        Returns: updated node tensor
+
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         return nodes + b_ei_p
 
     def rho_e_u(self, e_p, inputs):
+        """
+        aggregate edge to state
+        Args:
+            e_p (tensor): edge tensor
+            inputs (tuple of tensors): other graph input tensors
+
+        Returns: edge aggregated tensor for states
+
+        """
         return 0
 
     def rho_v_u(self, v_p, inputs):
+        """
+        Args:
+            v_p (tf.Tensor): updated atom/node attributes
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            atom/node to global/state aggregated tensor
+        """
         return 0
 
     def phi_u(self, b_e_p, b_v_p, inputs):
+        """
+        Args:
+            b_e_p (tf.Tensor): edge/bond to global aggregated tensor
+            b_v_p (tf.Tensor): node/atom to global aggregated tensor
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            updated globa/state attributes
+        """
         return inputs[2]
 
     def _mlp(self, input_, weights, bias):
@@ -128,5 +188,11 @@ def _mlp(self, input_, weights, bias):
         return output
 
     def get_config(self):
+        """
+         Part of keras layer interface, where the signature is converted into a dict
+        Returns:
+            configurational dictionary
+
+        """
         base_config = super().get_config()
         return dict(list(base_config.items()))
diff --git a/megnet/layers/graph/megnet.py b/megnet/layers/graph/megnet.py
index 7406ebeeb..a56a25a52 100644
--- a/megnet/layers/graph/megnet.py
+++ b/megnet/layers/graph/megnet.py
@@ -1,3 +1,6 @@
+"""
+Megnet graph layer implementation
+"""
 import tensorflow as tf
 import tensorflow.keras.backend as kb
 
@@ -17,21 +20,6 @@ class MEGNetLayer(GraphNetworkLayer):
     Chen, Chi; Ye, Weike Ye; Zuo, Yunxing; Zheng, Chen; Ong, Shyue Ping.
     Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals,
     2018, arXiv preprint. [arXiv:1812.05055](https://arxiv.org/abs/1812.05055)
-
-    Args:
-        units_v (list of integers): the hidden layer sizes for node update neural network
-        units_e (list of integers): the hidden layer sizes for edge update neural network
-        units_u (list of integers): the hidden layer sizes for state update neural network
-        pool_method (str): 'mean' or 'sum', determines how information is gathered to nodes from neighboring edges
-        activation (str): Default: None. The activation function used for each sub-neural network. Examples include
-            'relu', 'softmax', 'tanh', 'sigmoid' and etc.
-        use_bias (bool): Default: True. Whether to use the bias term in the neural network.
-        kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
-        bias_initializer (str): Default: 'zeros'
-        activity_regularizer (str): Default: None. The regularization function for the output
-        kernel_constraint (str): Default: None. Keras constraint for kernel values
-        bias_constraint (str): Default: None .Keras constraint for bias values
-
     Methods:
         call(inputs, mask=None): the logic of the layer, returns the final graph
         compute_output_shape(input_shape): compute static output shapes, returns list of tuple shapes
@@ -61,6 +49,21 @@ def __init__(self,
                  kernel_constraint=None,
                  bias_constraint=None,
                  **kwargs):
+        """
+        Args:
+            units_v (list of integers): the hidden layer sizes for node update neural network
+            units_e (list of integers): the hidden layer sizes for edge update neural network
+            units_u (list of integers): the hidden layer sizes for state update neural network
+            pool_method (str): 'mean' or 'sum', determines how information is gathered to nodes from neighboring edges
+            activation (str): Default: None. The activation function used for each sub-neural network. Examples include
+                'relu', 'softmax', 'tanh', 'sigmoid' and etc.
+            use_bias (bool): Default: True. Whether to use the bias term in the neural network.
+            kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
+            bias_initializer (str): Default: 'zeros'
+            activity_regularizer (str): Default: None. The regularization function for the output
+            kernel_constraint (str): Default: None. Keras constraint for kernel values
+            bias_constraint (str): Default: None .Keras constraint for bias values
+        """
 
         super().__init__(activation=activation,
                          use_bias=use_bias,
@@ -86,6 +89,12 @@ def __init__(self,
             raise ValueError('Pool method: ' + pool_method + ' not understood!')
 
     def build(self, input_shapes):
+        """
+        Build the weights for the layer
+        Args:
+            input_shapes (sequence of tuple): the shapes of all input tensors
+
+        """
         vdim = input_shapes[0][2]
         edim = input_shapes[1][2]
         udim = input_shapes[2][2]
@@ -151,6 +160,14 @@ def build(self, input_shapes):
         self.built = True
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute output shapes from input shapes
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        Returns: sequence of tuples output shapes
+
+        """
         node_feature_shape = input_shape[0]
         edge_feature_shape = input_shape[1]
         state_feature_shape = input_shape[2]
@@ -161,6 +178,13 @@ def compute_output_shape(self, input_shape):
         return output_shape
 
     def phi_e(self, inputs):
+        """
+        Edge update function
+        Args:
+            inputs (tuple of tensor)
+        Returns:
+            output tensor
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         index1 = tf.reshape(index1, (-1,))
         index2 = tf.reshape(index2, (-1,))
@@ -172,27 +196,69 @@ def phi_e(self, inputs):
         return self._mlp(concated, self.phi_e_weights, self.phi_e_biases)
 
     def rho_e_v(self, e_p, inputs):
+        """
+        Reduce edge attributes to node attribute, eqn 5 in the paper
+        Args:
+            e_p: updated bond
+            inputs: the whole input list
+
+        Returns: summed tensor
+
+        """
         node, edges, u, index1, index2, gnode, gbond = inputs
         index1 = tf.reshape(index1, (-1,))
         return tf.expand_dims(self.seg_method(tf.squeeze(e_p), index1), axis=0)
 
     def phi_v(self, b_ei_p, inputs):
+        """
+        Node update function
+        Args:
+            b_ei_p (tensor): edge aggregated tensor
+            inputs (tuple of tensors): other graph inputs
+
+        Returns: updated node tensor
+
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         u_expand = repeat_with_index(u, gnode, axis=1)
         concated = tf.concat([b_ei_p, nodes, u_expand], axis=-1)
         return self._mlp(concated, self.phi_v_weights, self.phi_v_biases)
 
     def rho_e_u(self, e_p, inputs):
+        """
+        aggregate edge to state
+        Args:
+            e_p (tensor): edge tensor
+            inputs (tuple of tensors): other graph input tensors
+
+        Returns: edge aggregated tensor for states
+
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         gbond = tf.reshape(gbond, (-1,))
         return tf.expand_dims(self.seg_method(tf.squeeze(e_p), gbond), axis=0)
 
     def rho_v_u(self, v_p, inputs):
+        """
+        Args:
+            v_p (tf.Tensor): updated atom/node attributes
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            atom/node to global/state aggregated tensor
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         gnode = tf.reshape(gnode, (-1,))
         return tf.expand_dims(self.seg_method(tf.squeeze(v_p, axis=0), gnode), axis=0)
 
     def phi_u(self, b_e_p, b_v_p, inputs):
+        """
+        Args:
+            b_e_p (tf.Tensor): edge/bond to global aggregated tensor
+            b_v_p (tf.Tensor): node/atom to global aggregated tensor
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            updated globa/state attributes
+        """
         concated = tf.concat([b_e_p, b_v_p, inputs[2]], axis=-1)
         return self._mlp(concated, self.phi_u_weights, self.phi_u_biases)
 
@@ -206,6 +272,12 @@ def _mlp(self, input_, weights, biases):
         return output
 
     def get_config(self):
+        """
+         Part of keras layer interface, where the signature is converted into a dict
+        Returns:
+            configurational dictionary
+
+        """
         config = {
             'units_e': self.units_e,
             'units_v': self.units_v,
diff --git a/megnet/layers/graph/schnet.py b/megnet/layers/graph/schnet.py
index 13d85d76b..8f990bcf4 100644
--- a/megnet/layers/graph/schnet.py
+++ b/megnet/layers/graph/schnet.py
@@ -1,3 +1,6 @@
+"""
+Schnet implementation
+"""
 import tensorflow as tf
 import tensorflow.keras.backend as kb
 
@@ -11,15 +14,6 @@ class InteractionLayer(GraphNetworkLayer):
 
     Schütt et al. SchNet: A continuous-filter convolutional neural network for modeling quantum interactions
 
-    Args:
-        activation (str): Default: None. The activation function used for each sub-neural network. Examples include
-            'relu', 'softmax', 'tanh', 'sigmoid' and etc.
-        use_bias (bool): Default: True. Whether to use the bias term in the neural network.
-        kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
-        bias_initializer (str): Default: 'zeros'
-        activity_regularizer (str): Default: None. The regularization function for the output
-        kernel_constraint (str): Default: None. Keras constraint for kernel values
-        bias_constraint (str): Default: None .Keras constraint for bias values
 
     Methods:
         call(inputs, mask=None): the logic of the layer, returns the final graph
@@ -46,6 +40,17 @@ def __init__(self,
                  kernel_constraint=None,
                  bias_constraint=None,
                  **kwargs):
+        """
+        Args:
+            activation (str): Default: None. The activation function used for each sub-neural network. Examples include
+                'relu', 'softmax', 'tanh', 'sigmoid' and etc.
+            use_bias (bool): Default: True. Whether to use the bias term in the neural network.
+            kernel_initializer (str): Default: 'glorot_uniform'. Initialization function for the layer kernel weights,
+            bias_initializer (str): Default: 'zeros'
+            activity_regularizer (str): Default: None. The regularization function for the output
+            kernel_constraint (str): Default: None. Keras constraint for kernel values
+            bias_constraint (str): Default: None .Keras constraint for bias values
+        """
         super().__init__(activation=activation,
                          use_bias=use_bias,
                          kernel_initializer=kernel_initializer,
@@ -58,6 +63,12 @@ def __init__(self,
                          **kwargs)
 
     def build(self, input_shapes):
+        """
+        Build the weights for the layer
+        Args:
+            input_shapes (sequence of tuple): the shapes of all input tensors
+
+        """
         vdim = input_shapes[0][2]
         edim = input_shapes[1][2]
 
@@ -102,9 +113,24 @@ def build(self, input_shapes):
         self.built = True
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute output shapes from input shapes
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        Returns: sequence of tuples output shapes
+
+        """
         return input_shape
 
     def phi_e(self, inputs):
+        """
+        Edge update function
+        Args:
+            inputs (tuple of tensor)
+        Returns:
+            output tensor
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         return edges
 
@@ -139,16 +165,49 @@ def rho_e_v(self, e_p, inputs):
         return atomwise3
 
     def phi_v(self, b_ei_p, inputs):
+        """
+        Node update function
+        Args:
+            b_ei_p (tensor): edge aggregated tensor
+            inputs (tuple of tensors): other graph inputs
+
+        Returns: updated node tensor
+
+        """
         nodes, edges, u, index1, index2, gnode, gbond = inputs
         return nodes + b_ei_p
 
     def rho_e_u(self, e_p, inputs):
+        """
+        aggregate edge to state
+        Args:
+            e_p (tensor): edge tensor
+            inputs (tuple of tensors): other graph input tensors
+
+        Returns: edge aggregated tensor for states
+
+        """
         return 0
 
     def rho_v_u(self, v_p, inputs):
+        """
+        Args:
+            v_p (tf.Tensor): updated atom/node attributes
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            atom/node to global/state aggregated tensor
+        """
         return 0
 
     def phi_u(self, b_e_p, b_v_p, inputs):
+        """
+        Args:
+            b_e_p (tf.Tensor): edge/bond to global aggregated tensor
+            b_v_p (tf.Tensor): node/atom to global aggregated tensor
+            inputs (Sequence): list or tuple for the graph inputs
+        Returns:
+            updated globa/state attributes
+        """
         return inputs[2]
 
     def _mlp(self, input_, weights, bias):
@@ -156,5 +215,11 @@ def _mlp(self, input_, weights, bias):
         return output
 
     def get_config(self):
+        """
+         Part of keras layer interface, where the signature is converted into a dict
+        Returns:
+            configurational dictionary
+
+        """
         base_config = super().get_config()
         return base_config
diff --git a/megnet/layers/readout/linear.py b/megnet/layers/readout/linear.py
index 6c623c835..e7376a8ce 100644
--- a/megnet/layers/readout/linear.py
+++ b/megnet/layers/readout/linear.py
@@ -15,11 +15,13 @@
 class LinearWithIndex(Layer):
     """
     Sum or average the node/edge attributes to get a structure-level vector
-
-    Args:
-        mode: (str) 'mean', 'sum', 'max', 'mean' or 'prod'
     """
     def __init__(self, mode='mean', **kwargs):
+        """
+        Args:
+            mode: (str) 'mean', 'sum', 'max', 'mean' or 'prod'
+            **kwargs:
+        """
         super(LinearWithIndex, self).__init__(**kwargs)
         self.mode = mode
         self.reduce_method = MAPPING.get(mode, None)
@@ -27,9 +29,24 @@ def __init__(self, mode='mean', **kwargs):
             raise ValueError('mode not supported')
 
     def build(self, input_shape):
+        """
+        Build tensors
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        """
         self.built = True
 
     def call(self, inputs, mask=None):
+        """
+        Main logic
+        Args:
+            inputs (tuple of tensor): input tensors
+            mask (tensor): mask tensor
+
+        Returns: output tensor
+
+        """
         prop, index = inputs
         index = tf.reshape(index, (-1,))
         prop = tf.transpose(a=prop, perm=[1, 0, 2])
@@ -38,10 +55,23 @@ def call(self, inputs, mask=None):
         return out
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute output shapes from input shapes
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        Returns: sequence of tuples output shapes
+
+        """
         prop_shape = input_shape[0]
         return prop_shape[0], None, prop_shape[-1]
 
     def get_config(self):
+        """
+         Part of keras layer interface, where the signature is converted into a dict
+        Returns:
+            configurational dictionary
+        """
         config = {'mode': self.mode}
         base_config = super(LinearWithIndex, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
diff --git a/megnet/layers/readout/set2set.py b/megnet/layers/readout/set2set.py
index c95d14fbf..81535dbdd 100644
--- a/megnet/layers/readout/set2set.py
+++ b/megnet/layers/readout/set2set.py
@@ -17,25 +17,6 @@ class Set2Set(Layer):
     "Order matters: Sequence to sequence for sets." arXiv preprint
     arXiv:1511.06391 (2015).
 
-    Args:
-        T: (int) recurrent step
-        n_hidden: (int) number of hidden units
-        activation: (str or object) activation function
-        activation_lstm: (str or object) activation function for lstm
-        recurrent_activation: (str or object) activation function for recurrent step
-        kernel_initializer: (str or object) initializer for kernel weights
-        recurrent_initializer: (str or object) initializer for recurrent weights
-        bias_initializer: (str or object) initializer for biases
-        use_bias: (bool) whether to use biases
-        unit_forget_bias: (bool) whether to use basis in forget gate
-        kernel_regularizer: (str or object) regularizer for kernel weights
-        recurrent_regularizer: (str or object) regularizer for recurrent weights
-        bias_regularizer: (str or object) regularizer for biases
-        kernel_constraint: (str or object) constraint for kernel weights
-        recurrent_constraint: (str or object) constraint for recurrent weights
-        bias_constraint:(str or object) constraint for biases
-        kwargs: other inputs for keras Layer class
-
     """
 
     def __init__(self,
@@ -56,7 +37,26 @@ def __init__(self,
                  recurrent_constraint=None,
                  bias_constraint=None,
                  **kwargs):
-
+        """
+        Args:
+            T: (int) recurrent step
+            n_hidden: (int) number of hidden units
+            activation: (str or object) activation function
+            activation_lstm: (str or object) activation function for lstm
+            recurrent_activation: (str or object) activation function for recurrent step
+            kernel_initializer: (str or object) initializer for kernel weights
+            recurrent_initializer: (str or object) initializer for recurrent weights
+            bias_initializer: (str or object) initializer for biases
+            use_bias: (bool) whether to use biases
+            unit_forget_bias: (bool) whether to use basis in forget gate
+            kernel_regularizer: (str or object) regularizer for kernel weights
+            recurrent_regularizer: (str or object) regularizer for recurrent weights
+            bias_regularizer: (str or object) regularizer for biases
+            kernel_constraint: (str or object) constraint for kernel weights
+            recurrent_constraint: (str or object) constraint for recurrent weights
+            bias_constraint:(str or object) constraint for biases
+            kwargs: other inputs for keras Layer class
+        """
         super().__init__(**kwargs)
         self.activation = activations.get(activation)
         self.use_bias = use_bias
@@ -78,7 +78,12 @@ def __init__(self,
         self.n_hidden = n_hidden
 
     def build(self, input_shape):
+        """
+        Build tensors
+        Args:
+            input_shape (sequence of tuple): input shapes
 
+        """
         feature_shape, index_shape = input_shape
         self.m_weight = self.add_weight(
             shape=(feature_shape[-1], self.n_hidden),
@@ -123,10 +128,27 @@ def bias_initializer(_, *args, **kwargs):
         self.built = True
 
     def compute_output_shape(self, input_shape):
+        """
+        Compute output shapes from input shapes
+        Args:
+            input_shape (sequence of tuple): input shapes
+
+        Returns: sequence of tuples output shapes
+
+        """
         feature_shape, index_shape = input_shape
         return feature_shape[0], None, 2 * self.n_hidden
 
     def call(self, inputs, mask=None):
+        """
+        Main logic
+        Args:
+            inputs (tuple of tensor): input tensors
+            mask (tensor): mask tensor
+
+        Returns: output tensor
+
+        """
         features, feature_graph_index = inputs
         feature_graph_index = tf.reshape(feature_graph_index, (-1,))
         _, _, count = tf.unique_with_counts(feature_graph_index)
@@ -181,6 +203,11 @@ def _lstm(self, h, c):
         return h, c
 
     def get_config(self):
+        """
+         Part of keras layer interface, where the signature is converted into a dict
+        Returns:
+            configurational dictionary
+        """
         config = {"T": self.T,
                   "n_hidden": self.n_hidden,
                   "activation": activations.serialize(self.activation),
diff --git a/megnet/losses.py b/megnet/losses.py
index 4a8c6b015..d9d8c929a 100644
--- a/megnet/losses.py
+++ b/megnet/losses.py
@@ -1,3 +1,6 @@
+"""
+Loss functions
+"""
 import tensorflow.keras.backend as kb
 
 
diff --git a/megnet/models/megnet.py b/megnet/models/megnet.py
index a533f4f10..27f70991c 100644
--- a/megnet/models/megnet.py
+++ b/megnet/models/megnet.py
@@ -148,6 +148,14 @@ def from_url(cls, url: str) -> GraphModel:
 
     @classmethod
     def from_mvl_models(cls, name: str) -> GraphModel:
+        """
+        load model using mvl model names
+        Args:
+            name (str): model name string. Please check
+                megnet.utils.models.AVAILABEL_MODELS for available models
+        Returns: GraphModel instance
+
+        """
         from megnet.utils.models import load_model
         return load_model(name)
 
diff --git a/megnet/utils/descriptor.py b/megnet/utils/descriptor.py
index cd019d4ee..b7523df87 100644
--- a/megnet/utils/descriptor.py
+++ b/megnet/utils/descriptor.py
@@ -22,16 +22,18 @@ class MEGNetDescriptor:
     MEGNet descriptors. This class takes a trained model and
     then compute the intermediate outputs as structure features
 
-    Args:
-        model_name (str or MEGNetModel): trained model. If it is
-            str, then only models in mvl_models are used.
-        use_cache (bool): whether to use cache for structure
-            graph calculations
     """
     def __init__(self,
                  model_name: Union[str, GraphModel,
                                    MEGNetModel] = DEFAULT_MODEL,
                  use_cache: bool = True):
+        """
+        Args:
+            model_name (str or MEGNetModel): trained model. If it is
+                str, then only models in mvl_models are used.
+            use_cache (bool): whether to use cache for structure
+                graph calculations
+        """
         if isinstance(model_name, str):
             model = MEGNetModel.from_file(model_name)
         elif isinstance(model_name, GraphModel):
diff --git a/megnet/utils/preprocessing.py b/megnet/utils/preprocessing.py
index 8e02bf66a..7d8a0ef41 100644
--- a/megnet/utils/preprocessing.py
+++ b/megnet/utils/preprocessing.py
@@ -52,18 +52,20 @@ class StandardScaler(Scaler):
     For extensive quantity, the mean is the mean of target/atom, and
     std is the std for target/atom
 
-    Args:
-        mean (float): mean value of target
-        std (float): standard deviation of target
-        is_intensive (bool): whether the target is already an intensive
-            property
-
     Methods:
         transform(self, target, n=1): standard scaling the target and
     """
 
     def __init__(self, mean: float = 0.0, std: float = 1.0,
                  is_intensive: bool = True):
+        """
+
+        Args:
+            mean (float): mean value of target
+            std (float): standard deviation of target
+            is_intensive (bool): whether the target is already an intensive
+                property
+        """
         self.mean = mean
         if np.abs(std) < np.finfo(float).eps:
             std = 1.0