In [90]:
from lifelines.fitters import ParametericUnivariateFitter
from datetime import timedelta
import pandas as pd
import numpy
from autograd import numpy as np
from scipy.stats import expon, bernoulli

In [81]:
# Num_samples of data to generate
n_samples = 20

# Exponential times for month
# Exponential times for month
tau_for_quarter = {
    1: 5,
    2: 5,
    3: 2,
    4: 2,
}

# Create a dataframe of dates along
df = pd.DataFrame({'date': pd.date_range('1/1/2019', '12/31/2019', freq='D')})
df['quarter'] = df.date.dt.quarter


# Randomly generate exponential times for each date
np.random.seed(777)
times = []
observed = []
for quarter in df.quarter:
    tau = tau_for_quarter[quarter]
    time = expon(scale=tau).rvs()
    times.append(np.round(time))
    observed.append(bernoulli(.7).rvs())
    
df['times'] = pd.Series(times, dtype=float)
df['observed'] = observed
df = df[df.times > 0]

df = df.sample(n_samples, replace=True).reset_index(drop=True).drop('quarter', axis=1)
print(df.dtypes)

df





date        datetime64[ns]
times              float64
observed             int64
dtype: object


Unnamed: 0,date,times,observed
0,2019-12-11,2.0,1
1,2019-11-02,11.0,1
2,2019-04-18,2.0,1
3,2019-06-17,8.0,1
4,2019-08-15,6.0,0
5,2019-08-10,3.0,1
6,2019-07-11,5.0,1
7,2019-03-14,2.0,1
8,2019-03-18,5.0,0
9,2019-10-04,3.0,1


In [94]:
class Seasonal(ParametericUnivariateFitter):
    
    _fitted_parameter_names = ['q1', 'q2', 'q3', 'q4']
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._dates = None

    def use_dates(self, dates):
        """
        Utility method to set the dates corresponding to times.
        """
        self._dates = pd.Series(dates)
        return self
        
    def _cumulative_hazard(self, params, times):
        # Extract params
        tau1, tau2, tau3, tau4 = params
        tau_lookup = {
            1: tau1,
            2: tau2,
            3: tau3,
            4: tau4
        }
        
        # The dates represent end times
        end_dates = self._dates
        if isinstance(times, numpy.ndarray):
            start_dates = [date - timedelta(days=time) for (date, time) in zip(self._dates, times)]
        else:
            start_dates = [date - timedelta(days=time._value) for (date, time) in zip(self._dates, times)]
        
        
        # Initialize an empty list of cumulative hazards
        cum_hazard = []
        
        # This is wildly inefficient, but I'm just testing out the concept
        for start_date, end_date in zip(start_dates, end_dates):
            df = pd.DataFrame({'date': pd.date_range(start_date, end_date, freq='D')})
            df['quarter'] = df.date.dt.quarter
            df['tau'] = [tau_lookup[q] for q in df.quarter]
            df['haz'] = 1 / df.tau
            cum_haz = df.haz.sum()
            cum_hazard.append(cum_haz)
            
        return np.array(cum_hazard)
        
        

fitter = Seasonal().use_dates(df.date)
fitter.fit(df.times, df.observed)



<lifelines.Seasonal: fitted with 20 observations, 5 censored>