Skip to content

Commit

Permalink
refs #17 : Fix formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ragulpr committed Jul 18, 2017
1 parent 1bfbcb9 commit ef761a9
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 31 deletions.
39 changes: 30 additions & 9 deletions python/wtte/transforms.py
Expand Up @@ -10,24 +10,31 @@
from .tte_util import get_tte


def df_to_array(df, column_names, nanpad_right=True, return_lists=False, id_col='id', t_col='t'):
""" converts flat pandas df `{id,t,col1,col2,..}` to array indexed `[id,t,col]`.
def df_to_array(df, column_names, nanpad_right=True, return_lists=False,
id_col='id', t_col='t'):
"""Converts flat pandas df with cols `id,t,col1,col2,..` to array indexed `[id,t,col]`.
:param df: dataframe with columns:
* `id`: Any type. A unique key for the sequence.
:param df: dataframe with columns
* `id`: Any type. A key for the sequence.
* `t`: integer. If `t` is a non-contiguous int vec per id then steps in
between t's are padded with zeros.
* `columns` in `column_names` (String list)
:type df: Pandas dataframe
:param Boolean nanpad_right: If `True`, sequences are `np.nan`-padded to `max_seq_len`
:param return_lists: Put every tensor in its own subarray
:param_id_col: string column name for `id`
:param t_col: string column name for `t`
:return padded: With seqlen the max value of `t` per id
* if nanpad_right & !return_lists:
a numpy float array of dimension `[n_seqs,max_seqlen,n_features]`
* if nanpad_right & return_lists:
n_seqs numpy float sub-arrays of dimension `[max_seqlen,n_features]`
* if !nanpad_right & return_lists:
n_seqs numpy float sub-arrays of dimension `[seqlen,n_features]`
"""
Expand Down Expand Up @@ -73,14 +80,17 @@ def df_to_array(df, column_names, nanpad_right=True, return_lists=False, id_col=


def df_to_padded(df, column_names, id_col='id', t_col='t'):
"""pads pandas df to a numpy array of shape [n_seqs,max_seqlen,n_features].
see df_to_array for details
"""pads pandas df to a numpy array of shape `[n_seqs,max_seqlen,n_features]`.
see `df_to_array` for details
"""
return df_to_array(df, column_names, nanpad_right=True,
return_lists=False, id_col=id_col, t_col=t_col)


def df_to_subarrays(df, column_names, id_col='id', t_col='t'):
"""pads pandas df to subarrays of shape `[n_seqs][seqlen[s],n_features]`.
see `df_to_array` for details
"""
return df_to_array(df, column_names, nanpad_right=False,
return_lists=True, id_col=id_col, t_col=t_col)

Expand All @@ -98,8 +108,11 @@ def padded_to_df(padded, column_names, dtypes, ids=None, id_col='id', t_col='t')
:param id_col: Column where `id` is located. Default value is `id`.
:param t_col: Column where `t` is located. Default value is `t`.
:return df: Dataframe with Columns
* `id` (Integer) or the value of `ids`
* `t` (Integer).
A row in df is the t'th event for a `id` and has columns from `column_names`
"""

Expand Down Expand Up @@ -169,6 +182,8 @@ def get_basic_df(padded, is_nonempty, ids):
def padded_events_to_tte(events, discrete_time, t_elapsed=None):
""" computes (right censored) time to event from padded binary events.
For details see `tte_util.get_tte`
:param Array events: Events array.
:param Boolean discrete_time: `True` when applying discrete time scheme.
:param Array t_elapsed: Elapsed time. Default value is `None`.
Expand Down Expand Up @@ -313,9 +328,10 @@ def df_join_in_endtime(df, constant_per_id_cols='id',
:param constant_per_id_cols: identifying id and
columns remaining constant per id&timestep
:type constant_per_id_cols: String or String list
:param String abs_time_col: identifying the wall-clock column.
:param df[abs_time_cols]) abs_endtime: If none it's inferred.
:return pandas.dataframe df: pandas dataframe with a value
:param String abs_time_col: identifying the wall-clock column df[abs_time_cols].
:param df[abs_time_cols]) abs_endtime: The time to join in. If None it's inferred.
:type abs_endtime: None or same as df[abs_time_cols].values.
:return pandas.dataframe df: pandas dataframe where each `id` has rows at the endtime.
"""
assert 't' not in df.columns.values, 'define elapsed time upstream'

Expand Down Expand Up @@ -349,6 +365,10 @@ def shift_discrete_padded_features(padded, fill=0):
In the continuous case "2015-12-15 23.59" means exactly at
"2015-12-15 23.59: 00000000".
TODO does not render in sphinx.
::
Discrete case
t|dt |Event
0|2015-12-15 00.00-23.59|1
Expand Down Expand Up @@ -378,6 +398,7 @@ def shift_discrete_padded_features(padded, fill=0):
TTE |1|3|2|1|?|?|....
Observed*|T|T|T|T|T|T|....
Observed* = Do we have feature data at this time?
In the discrete case:
-> we need to roll data intent as features to the right.
Expand Down
15 changes: 8 additions & 7 deletions python/wtte/tte_util.py
Expand Up @@ -7,7 +7,6 @@

# TODO
# - Proper tests of everything
# - naming in general.
# - be clearer about meaning of t_elapsed, t_ix and either (t)
# - Time Since Event is a ticking bomb. Needs better naming/definitions
# to ensure that it's either inverse TTE or a feature or if they coincide.
Expand All @@ -28,7 +27,7 @@ def roll_fun(x, size, fun=np.mean, reverse=False):


def carry_forward_if(x, is_true):
"""Locomote forward object x[i] if is_true[i].
"""Locomote forward `x[i]` if `is_true[i]`.
remain x untouched before first pos of truth.
:param Array x: object whos elements are to carry forward
Expand All @@ -44,7 +43,7 @@ def carry_forward_if(x, is_true):


def carry_backward_if(x, is_true):
"""Locomote backward object x[i] if is_true[i].
"""Locomote backward `x[i]` if `is_true[i]`.
remain x untouched after last pos of truth.
:param Array x: object whos elements are to carry backward
Expand Down Expand Up @@ -151,7 +150,7 @@ def get_tte_continuous(is_event, t_elapsed):


def get_tte(is_event, discrete_time, t_elapsed=None):
""" wrapper to calculate Time To Event for input vector.
""" wrapper to calculate *Time To Event* for input vector.
:param Boolean discrete_time: if `True`, use `get_tte_discrete`. If `False`, use `get_tte_continuous`.
"""
Expand All @@ -162,7 +161,7 @@ def get_tte(is_event, discrete_time, t_elapsed=None):


def get_tse(is_event, t_elapsed=None):
""" Wrapper to calculate Time Since Event for input vector.
""" Wrapper to calculate *Time Since Event* for input vector.
Inverse of tte. Safe to use as a feature.
Always "continuous" method of calculating it.
Expand All @@ -172,7 +171,9 @@ def get_tse(is_event, t_elapsed=None):
tse = 0 at first step
:param Array is_event: Boolean array
:param IntArray t_elapsed: integer array with same length as `is_event` . If none, it will use `t_elapsed.max() - t_elapsed[::-1]`.
:param IntArray t_elapsed: None or integer array with same length as `is_event`.
* If none, it will use `t_elapsed.max() - t_elapsed[::-1]`.
.. TODO::
reverse-indexing is pretty slow and ugly and not a helpful template for implementing in other languages.
Expand All @@ -185,7 +186,7 @@ def get_tse(is_event, t_elapsed=None):


def get_is_not_censored(is_event, discrete_time=True):
""" Calculates non-censoring indicator u
""" Calculates non-censoring indicator `u`.
:param Boolean discrete_time: if `True`, last observation is conditionally censored.
"""
Expand Down
40 changes: 29 additions & 11 deletions python/wtte/weibull.py
Expand Up @@ -20,7 +20,7 @@ def hazard(t, a, b):


def cdf(t, a, b):
""" Cumulative distribution function
""" Cumulative distribution function.
:param t: Value
:param a: Alpha
Expand All @@ -32,21 +32,21 @@ def cdf(t, a, b):


def pdf(t, a, b):
""" Probability distribution function
:param t: Value
""" Probability distribution function.
:param t: Value
:param a: Alpha
:param b: Beta
:return:`(b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))`
:return: `(b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))`
"""
t = np.double(t)
return (b / a) * np.power(t / a, b - 1) * np.exp(-np.power(t / a, b))


def cmf(t, a, b):
""" Cumulative Mass Function
""" Cumulative Mass Function.
:param t: Value
:param t: Value
:param a: Alpha
:param b: Beta
:return: `cdf(t + 1, a, b)`
Expand All @@ -56,9 +56,9 @@ def cmf(t, a, b):


def pmf(t, a, b):
""" Probability mass function
:param t:
""" Probability mass function.
:param t: Value
:param a: Alpha
:param b: Beta
:return: `cdf(t + 1.0, a, b) - cdf(t, a, b)`
Expand Down Expand Up @@ -109,8 +109,13 @@ def mean(a, b):


def continuous_loglik(t, a, b, u=1, equality=False):
"""Continous censored loglikelihood function.
:param bool equality: In ML we usually only care about the likelihood
with *proportionality*, removing terms not dependent on the parameters.
If this is set to `True` we keep those terms.
"""
if equality:
# With equality instead of proportionality.
loglik = u * np.log(pdf(t, a, b)) + (1 - u) * \
np.log(1.0 - cdf(t, a, b))
else:
Expand All @@ -122,6 +127,12 @@ def continuous_loglik(t, a, b, u=1, equality=False):


def discrete_loglik(t, a, b, u=1, equality=False):
"""Discrete censored loglikelihood function.
:param bool equality: In ML we usually only care about the likelihood
with *proportionality*, removing terms not dependent on the parameters.
If this is set to `True` we keep those terms.
"""
if equality:
# With equality instead of proportionality.
loglik = u * np.log(pmf(t, a, b)) + (1 - u) * \
Expand All @@ -138,6 +149,13 @@ def discrete_loglik(t, a, b, u=1, equality=False):


class conditional_excess():
""" Experimental class for conditional excess distribution.
The idea is to query `s` into the future after time `t`
has passed without event. Se thesis for details.
note: Note tested and may be incorrect!
"""

def pdf(t, s, a, b):
t = np.double(t)
Expand Down
19 changes: 15 additions & 4 deletions python/wtte/wtte.py
Expand Up @@ -48,6 +48,15 @@ def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, max_alpha_value=None):
(regularization) Use max_beta_value to implicitly regularize the model
(initialization) Fixed to begin moving slowly around 1.0
- Usage
.. code-block:: python
model.add(TimeDistributed(Dense(2)))
model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha,
"max_beta_value":2.0
}))
:param x: tensor with last dimension having length 2 with x[...,0] = alpha, x[...,1] = beta
:param init_alpha: initial value of `alpha`. Default value is 1.0.
:param max_beta_value: maximum beta value. Default value is 5.0.
Expand Down Expand Up @@ -90,8 +99,10 @@ def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, max_alpha_value=None):


class output_activation(object):
""" Elementwise computation of alpha and regularized beta using keras.layers.Activation.
Wrapper
""" Elementwise computation of alpha and regularized beta.
Object-Oriented Wrapper to `output_lambda` using keras.layers.Activation.
- Usage
.. code-block:: python
Expand All @@ -110,9 +121,9 @@ def __init__(self, init_alpha=1.0, max_beta_value=5.0):

def activation(self, ab):
""" (Internal function) Activation wrapper
:param ab: original tensor with alpha and beta.
:return ab: return of `output_lambda` with `init_alpha` and `max_beta_value`.
:return ab: return of `output_lambda` with `init_alpha` and `max_beta_value`.
"""
ab = output_lambda(ab, init_alpha=self.init_alpha,
max_beta_value=self.max_beta_value)
Expand Down

0 comments on commit ef761a9

Please sign in to comment.