In [211]:
from lifelines.fitters import ParametericUnivariateFitter
from datetime import timedelta
import pandas as pd
import numpy
from autograd import numpy as np
from autograd import grad, value_and_grad
from scipy.stats import expon, bernoulli
import fleming
import datetime
from scipy.optimize import minimize, fmin

In [216]:
def get_df(n_samples=200, seed=None):


    # Exponential times for month
    tau_for_quarter = {
        1: 200,
        2: 200,
        3: 200,
        4: 200,
    }

    # Create a dataframe of dates along
    df = pd.DataFrame({'dates': pd.date_range('1/1/2019', '12/31/2031', freq='D')})
    df['quarter'] = df.dates.dt.quarter


    # Randomly generate exponential times for each date
    if seed is not None:
        np.random.seed(seed)
    times = []
    observed = []
    for quarter in df.quarter:
        tau = tau_for_quarter[quarter]
        time = expon(scale=tau).rvs()
        times.append(np.round(time))
        observed.append(bernoulli(.7).rvs())

    df['times'] = pd.Series(times, dtype=float)
    df['observed'] = observed
    # df = df[df.times > 0]

    df = df.sample(n_samples, replace=True).reset_index(drop=True).drop('quarter', axis=1)
    return df





In [219]:
class Seasonal:
    _fitted_parameter_names = ['q1', 'q2', 'q3', 'q4']
    
    def __init__(self, durations, dates, observed):
        self.durations = durations
        self.dates = dates
        self.observed = observed
        self._days_in_quarter = None
    
    def _get_days_in_quarter(self, durations, dates):
        dates = pd.Series(dates)
        days_in_quarter = []
        for duration, end_date in zip(durations, dates):
            start_date = end_date - datetime.timedelta(days=duration)
            df = pd.DataFrame({'date': pd.date_range(start_date, end_date)})
            df['quarter'] = df.date.dt.quarter
            days_in_quarter.append(df.quarter.value_counts().to_dict())
        return days_in_quarter

    def _hazard(self, params, day):
        day = pd.Timestamp(day)
        return 1. / params[day.quarter - 1]

    def _cumulative_hazard(self, params):
        cum_hazards = []
        if self._days_in_quarter is None:
            self._days_in_quarter = self._get_days_in_quarter(self.durations, self.dates)
        
        for diq in self._days_in_quarter:
            cum_haz = 0.
            for quarter, days_in_quarter in diq.items():
                cum_haz += days_in_quarter / params[quarter - 1]
            cum_hazards.append(cum_haz)
                
        return np.array(cum_hazards, dtype=np.float64)
    
    def _log_likelihood(self, params):
        cum_hazards = self._cumulative_hazard(params)
        hazards = np.array([self._hazard(params, day) for day in self.dates])
        
        d_log_likelihood = self.observed * np.log(hazards) - cum_hazards
        out =  np.sum(d_log_likelihood)
        return - out
        
    

params = np.array([1., 1., 1., 1.])
params = np.array([50.1, 50.1, 20.2, 20.2])


df = get_df(n_samples=80)

seasonal = Seasonal(df.times, df.dates, df.observed)
seasonal._log_likelihood(params)

fmin(func=seasonal._log_likelihood, x0=params, maxiter=1000)
    


# grad_obj = value_and_grad(seasonal._log_likelihood)
  
# res = minimize(
#     grad_obj,
#     x0=params,
#     jac=True,
#     method='L-BFGS-B',
# )    


Optimization terminated successfully.
         Current function value: 377.957522
         Iterations: 203
         Function evaluations: 346


array([277.23076408, 194.3684131 , 250.21426465, 324.74995101])

In [209]:
_

array([290.99432539, 288.19451159, 314.18515731, 281.16665419])

In [150]:
def to_ordinal(time):
    has_iter = hasattr(time, '__iter__')
    if has_iter:
        time = pd.Series(time)
    else:
        time = pd.Series([time])
        
    out = pd.Series([t.toordinal() for t in time], dtype=np.float64)
    
    if has_iter:
        return out.values
    else:
        return out[0]

def from_ordinal(timestamp):
    has_iter = hasattr(timestamp, '__iter__')
    if has_iter:
        timestamp = pd.Series(timestamp)
    else:
        timestamp = pd.Series([timestamp])
        
    out = pd.Series([pd.Timestamp.fromordinal(int(t)) for t in timestamp])
    
    if has_iter:
        return out.values
    else:
        return out[0]
        