# Data Simulation - Part I: Feature Space

In [2]:
##### Simulate Data for Reserve Calculation#####

def data_simulation_features(A, B, c, N_contracts, Max_min, N_features=4, option = 'practical'):
   
    data = np.zeros([N_contracts,N_features])

    # Matrix Version of previous upper/ lower bounds on features
    #Max_min[0,0] = age_low
    #Max_min[0,1] = age_up
    #Max_min[1,0] = Sum_min
    #Max_min[1,1] = Sum_max
    #Max_min[2,0] = dur_min
    #Max_min[2,1] = dur_max
    #Max_min[3,0] = aoc_min
    #Max_min[3,1] = aoc_max


    # Use Sobol Sequence to optimize uniform coverage of multidimensional feature space
    random.seed(42)
    if option == 'practical':
        data[:,0:-1] = sobol_seq.i4_sobol_generate(N_features-1,N_contracts) # simulate age_of_contract seperately, conditionally on the duration
    elif option == 'all-over': # allows for irreasonable, i.e. matured contracts
        data = sobol_seq.i4_sobol_generate(N_features,N_contracts)
    else:
        print('Option unknown!')
        pass
    
    # Simulate initial ages of policyholders upon signing the contract
    # Note: We first simulate the initial age at start of contract (-> use age_up = Max_min[0,1]-Max_min[2,0])
    data[:,0] =(Max_min[0,0] + (Max_min[0,1]-Max_min[2,1]-Max_min[0,0])*data[:,0])

    # simulate sums insured
    data[:,1] = (Max_min[1,0]+(Max_min[1,1]-Max_min[1,0])*data[:,1])

    # simulate duration
    data[:,2] = (Max_min[2,0]+(Max_min[2,1]-Max_min[2,0])*data[:,2])
    
    if option == 'practical':
        # simulate age of contract, i.e. introduce shifts in reserve profiles
        rnd = np.random.rand(N_contracts)
        data[:,3] = rnd*(data[:,2]-1) # random percentage of duration has passed; at least one remaining year of contract (hence dur - 1)
    elif option == 'all-over':
        data[:,3] = (Max_min[3,0]+(Max_min[3,1]-Max_min[3,0])*data[:,3])
    else:
        print('Option unknown!')
        pass
    
    # obtain current age of policyholder (initial age + age of contract)
    data[:,0] = data[:,0] + data[:,3]
    
    # ceil data as integers
    data = data.astype('int')
    
    # simulate years of death
    #deaths = np.random.rand(N_contracts)
    #age_at_death = np.zeros(N_contracts)
    #cum_probs_live = np.zeros([Max_min[0,1]-Max_min[0,0]+1,Max_min[0,1]-Max_min[0,0]])
    #range_age = np.array(range(Max_min[0,0], Max_min[0,1]),ndmin = 2).T
    #range_time = np.array(range(Max_min[0,1]-Max_min[0,0]),ndmin= 2)
    #cum_probs_live = np.array(np.exp(-np.dot(A,range_time)-B/np.log(c)*c**range_age*(c**(range_time)-1)),ndmin = 2)

    #for k in range(N_contracts):
    #    age_at_death[k] = (np.where(cum_probs_live[(data[k,0]-Max_min[0,0]-1),:]<deaths[k]))[0][0]
    #    print(deaths[k], age_at_death[k])
    #    pass

    # add age at entry and years of survival to obtain age of deatch
    #age_at_death = age_at_death+data[:,0]
    
    
    ### Lower-dimensional Datasets
    # 1-dim: Solely Age Variable (input_used = ['age_start', 'Sum_ins', 'duration', 'age_of_contract'])
    data_age = data[:,0]
    data_sum = data[:,1]
    data_dur = data[:,2]
    data_aoc = data[:,3];
    
    return [data, [data_age, data_sum, data_dur, data_aoc]]