refs #17 : Fix formatting

ragulpr · Jul 18, 2017 · ef761a9 · ef761a9
1 parent 1bfbcb9
commit ef761a9
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 31 deletions.
diff --git a/python/wtte/transforms.py b/python/wtte/transforms.py
@@ -10,24 +10,31 @@
 from .tte_util import get_tte
 
 
-def df_to_array(df, column_names, nanpad_right=True, return_lists=False, id_col='id', t_col='t'):
-    """ converts flat pandas df `{id,t,col1,col2,..}` to array indexed `[id,t,col]`.
+def df_to_array(df, column_names, nanpad_right=True, return_lists=False,
+                id_col='id', t_col='t'):
+    """Converts flat pandas df with cols `id,t,col1,col2,..` to array indexed `[id,t,col]`. 
+
+    :param df: dataframe with columns:
+
+      * `id`: Any type. A unique key for the sequence.
 
-    :param df: dataframe with columns
-      * `id`: Any type. A key for the sequence.
       * `t`: integer. If `t` is a non-contiguous int vec per id then steps in
         between t's are padded with zeros.
+
       * `columns` in `column_names` (String list)
     :type df: Pandas dataframe
     :param Boolean nanpad_right: If `True`, sequences are `np.nan`-padded to `max_seq_len`
     :param return_lists: Put every tensor in its own subarray
     :param_id_col: string column name for `id`
     :param t_col: string column name for `t`
     :return padded: With seqlen the max value of `t` per id
+
       * if nanpad_right & !return_lists:
         a numpy float array of dimension `[n_seqs,max_seqlen,n_features]`
+
       * if nanpad_right & return_lists:
         n_seqs numpy float sub-arrays of dimension `[max_seqlen,n_features]`
+
       * if !nanpad_right & return_lists:
         n_seqs numpy float sub-arrays of dimension `[seqlen,n_features]`
     """
@@ -73,14 +80,17 @@ def df_to_array(df, column_names, nanpad_right=True, return_lists=False, id_col=
 
 
 def df_to_padded(df, column_names, id_col='id', t_col='t'):
-    """pads pandas df to a numpy array of shape [n_seqs,max_seqlen,n_features].
-        see df_to_array for details
+    """pads pandas df to a numpy array of shape `[n_seqs,max_seqlen,n_features]`.
+        see `df_to_array` for details
     """
     return df_to_array(df, column_names, nanpad_right=True,
                        return_lists=False, id_col=id_col, t_col=t_col)
 
 
 def df_to_subarrays(df, column_names, id_col='id', t_col='t'):
+    """pads pandas df to subarrays of shape `[n_seqs][seqlen[s],n_features]`.
+        see `df_to_array` for details
+    """
     return df_to_array(df, column_names, nanpad_right=False,
                        return_lists=True, id_col=id_col, t_col=t_col)
 
@@ -98,8 +108,11 @@ def padded_to_df(padded, column_names, dtypes, ids=None, id_col='id', t_col='t')
     :param id_col: Column where `id` is located. Default value is `id`.
     :param t_col: Column where `t` is located. Default value is `t`.
     :return df: Dataframe with Columns
+
       *  `id` (Integer) or the value of `ids`
+
       *  `t` (Integer).
+
       A row in df is the t'th event for a `id` and has columns from `column_names`
     """
 
@@ -169,6 +182,8 @@ def get_basic_df(padded, is_nonempty, ids):
 def padded_events_to_tte(events, discrete_time, t_elapsed=None):
     """ computes (right censored) time to event from padded binary events.
 
+    For details see `tte_util.get_tte`
+
     :param Array events: Events array.
     :param Boolean discrete_time: `True` when applying discrete time scheme.
     :param Array t_elapsed: Elapsed time. Default value is `None`.
@@ -313,9 +328,10 @@ def df_join_in_endtime(df, constant_per_id_cols='id',
         :param constant_per_id_cols: identifying id and
                                    columns remaining constant per id&timestep
         :type constant_per_id_cols: String or String list
-        :param String abs_time_col: identifying the wall-clock column.
-        :param df[abs_time_cols]) abs_endtime: If none it's inferred.
-        :return pandas.dataframe df: pandas dataframe with a value
+        :param String abs_time_col: identifying the wall-clock column df[abs_time_cols].
+        :param df[abs_time_cols]) abs_endtime: The time to join in. If None it's inferred.
+        :type abs_endtime: None or same as df[abs_time_cols].values.
+        :return pandas.dataframe df: pandas dataframe where each `id` has rows at the endtime.
     """
     assert 't' not in df.columns.values, 'define elapsed time upstream'
 
@@ -349,6 +365,10 @@ def shift_discrete_padded_features(padded, fill=0):
     In the continuous case "2015-12-15 23.59" means exactly at
     "2015-12-15 23.59: 00000000".
 
+    TODO does not render in sphinx.
+    ::
+
+
     Discrete case
     t|dt                    |Event
     0|2015-12-15 00.00-23.59|1
@@ -378,6 +398,7 @@ def shift_discrete_padded_features(padded, fill=0):
     TTE      |1|3|2|1|?|?|....
     Observed*|T|T|T|T|T|T|....
 
+
     Observed* = Do we have feature data at this time?
         In the discrete case:
         -> we need to roll data intent as features to the right.

diff --git a/python/wtte/tte_util.py b/python/wtte/tte_util.py
@@ -7,7 +7,6 @@
 
 # TODO
 # - Proper tests of everything
-# - naming in general.
 # - be clearer about meaning of t_elapsed, t_ix and either (t)
 # - Time Since Event is a ticking bomb. Needs better naming/definitions
 #   to ensure that it's either inverse TTE or a feature or if they coincide.
@@ -28,7 +27,7 @@ def roll_fun(x, size, fun=np.mean, reverse=False):
 
 
 def carry_forward_if(x, is_true):
-    """Locomote forward object x[i] if is_true[i].
+    """Locomote forward `x[i]` if `is_true[i]`.
         remain x untouched before first pos of truth.
 
         :param Array x: object whos elements are to carry forward
@@ -44,7 +43,7 @@ def carry_forward_if(x, is_true):
 
 
 def carry_backward_if(x, is_true):
-    """Locomote backward object x[i] if is_true[i].
+    """Locomote backward `x[i]` if `is_true[i]`.
         remain x untouched after last pos of truth.
 
         :param Array x: object whos elements are to carry backward
@@ -151,7 +150,7 @@ def get_tte_continuous(is_event, t_elapsed):
 
 
 def get_tte(is_event, discrete_time, t_elapsed=None):
-    """ wrapper to calculate Time To Event for input vector.
+    """ wrapper to calculate *Time To Event* for input vector.
 
         :param Boolean discrete_time: if `True`, use `get_tte_discrete`. If `False`, use `get_tte_continuous`.
     """
@@ -162,7 +161,7 @@ def get_tte(is_event, discrete_time, t_elapsed=None):
 
 
 def get_tse(is_event, t_elapsed=None):
-    """ Wrapper to calculate Time Since Event for input vector.
+    """ Wrapper to calculate *Time Since Event* for input vector.
 
         Inverse of tte. Safe to use as a feature.
         Always "continuous" method of calculating it.
@@ -172,7 +171,9 @@ def get_tse(is_event, t_elapsed=None):
         tse = 0 at first step
 
         :param Array is_event: Boolean array
-        :param IntArray t_elapsed: integer array with same length as `is_event` . If none, it will use `t_elapsed.max() - t_elapsed[::-1]`.
+        :param IntArray t_elapsed: None or integer array with same length as `is_event`.
+
+            * If none, it will use `t_elapsed.max() - t_elapsed[::-1]`.
 
         .. TODO::
         reverse-indexing is pretty slow and ugly and not a helpful template for implementing in other languages.
@@ -185,7 +186,7 @@ def get_tse(is_event, t_elapsed=None):
 
 
 def get_is_not_censored(is_event, discrete_time=True):
-    """ Calculates non-censoring indicator u
+    """ Calculates non-censoring indicator `u`.
 
         :param Boolean discrete_time: if `True`, last observation is conditionally censored.
     """

diff --git a/python/wtte/weibull.py b/python/wtte/weibull.py
@@ -20,7 +20,7 @@ def hazard(t, a, b):
 
 
 def cdf(t, a, b):
-    """ Cumulative distribution function
+    """ Cumulative distribution function.
 
     :param t: Value
     :param a: Alpha
@@ -32,21 +32,21 @@ def cdf(t, a, b):
 
 
 def pdf(t, a, b):
-    """ Probability distribution function
-    
-    :param t: Value 
+    """ Probability distribution function.
+
+    :param t: Value
     :param a: Alpha
     :param b: Beta
-    :return:`(b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))` 
+    :return: `(b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))`
     """
     t = np.double(t)
     return (b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))
 
 
 def cmf(t, a, b):
-    """ Cumulative Mass Function
+    """ Cumulative Mass Function.
 
-    :param t: Value 
+    :param t: Value
     :param a: Alpha
     :param b: Beta
     :return: `cdf(t + 1, a, b)`
@@ -56,9 +56,9 @@ def cmf(t, a, b):
 
 
 def pmf(t, a, b):
-    """ Probability mass function
-    
-    :param t: 
+    """ Probability mass function.
+
+    :param t: Value
     :param a: Alpha
     :param b: Beta
     :return: `cdf(t + 1.0, a, b) - cdf(t, a, b)`
@@ -109,8 +109,13 @@ def mean(a, b):
 
 
 def continuous_loglik(t, a, b, u=1, equality=False):
+    """Continous censored loglikelihood function.
+
+    :param bool equality: In ML we usually only care about the likelihood
+    with *proportionality*, removing terms not dependent on the parameters.
+    If this is set to `True` we keep those terms.
+    """
     if equality:
-        # With equality instead of proportionality.
         loglik = u * np.log(pdf(t, a, b)) + (1 - u) * \
             np.log(1.0 - cdf(t, a, b))
     else:
@@ -122,6 +127,12 @@ def continuous_loglik(t, a, b, u=1, equality=False):
 
 
 def discrete_loglik(t, a, b, u=1, equality=False):
+    """Discrete censored loglikelihood function.
+
+    :param bool equality: In ML we usually only care about the likelihood
+    with *proportionality*, removing terms not dependent on the parameters.
+    If this is set to `True` we keep those terms.
+    """
     if equality:
         # With equality instead of proportionality.
         loglik = u * np.log(pmf(t, a, b)) + (1 - u) * \
@@ -138,6 +149,13 @@ def discrete_loglik(t, a, b, u=1, equality=False):
 
 
 class conditional_excess():
+    """ Experimental class for conditional excess distribution.
+
+        The idea is to query `s` into the future after time `t`
+        has passed without event. Se thesis for details.
+
+        note: Note tested and may be incorrect!
+    """
 
     def pdf(t, s, a, b):
         t = np.double(t)

diff --git a/python/wtte/wtte.py b/python/wtte/wtte.py
@@ -48,6 +48,15 @@ def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, max_alpha_value=None):
             (regularization) Use max_beta_value to implicitly regularize the model
             (initialization) Fixed to begin moving slowly around 1.0
 
+        - Usage
+            .. code-block:: python
+
+                model.add(TimeDistributed(Dense(2)))
+                model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha, 
+                                                        "max_beta_value":2.0
+                                                       }))
+
+
         :param x: tensor with last dimension having length 2 with x[...,0] = alpha, x[...,1] = beta
         :param init_alpha: initial value of `alpha`. Default value is 1.0.
         :param max_beta_value: maximum beta value. Default value is 5.0.
@@ -90,8 +99,10 @@ def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, max_alpha_value=None):
 
 
 class output_activation(object):
-    """ Elementwise computation of alpha and regularized beta using keras.layers.Activation.
-        Wrapper
+    """ Elementwise computation of alpha and regularized beta.
+
+        Object-Oriented Wrapper to `output_lambda` using keras.layers.Activation.
+
 
         - Usage
             .. code-block:: python
@@ -110,9 +121,9 @@ def __init__(self, init_alpha=1.0, max_beta_value=5.0):
 
     def activation(self, ab):
         """ (Internal function) Activation wrapper
-        
+
         :param ab: original tensor with alpha and beta.
-        :return ab: return of `output_lambda` with `init_alpha` and `max_beta_value`. 
+        :return ab: return of `output_lambda` with `init_alpha` and `max_beta_value`.
         """
         ab = output_lambda(ab, init_alpha=self.init_alpha,
                            max_beta_value=self.max_beta_value)