In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

# Generating Data

In [2]:
def generate_date(year_1 = 1980, year_2 = 2000):
    return datetime.datetime(year_1, 1, 1) + datetime.timedelta(days= np.random.randint((datetime.datetime(year_2, 12,31) - datetime.datetime(year_1, 1,1)).days))  

In [3]:
emp_counts = int(1e4)
# Creating DataFrame
data = pd.DataFrame(data = {'id' : np.random.randint(0, emp_counts*10, emp_counts), 
                            'gender' : ['Male' if np.random.rand()<0.5 else 'Female' for i in range(emp_counts)],
                            'dob' : [generate_date() for i in range(emp_counts)],
                            'doh' : [generate_date(year_1 = 2008, year_2 = 2023) for i in range(emp_counts)],
                            'salary' : 6e6+ np.random.uniform(low = -3e6, high = 10e6, size = emp_counts) },)
data['dob'] = pd.to_datetime(data['dob'])
data['doh'] = pd.to_datetime(data['doh'])
data

Unnamed: 0,id,gender,dob,doh,salary
0,18562,Female,2000-03-31,2015-07-30,8.225596e+06
1,89107,Male,1995-09-19,2023-07-01,4.688068e+06
2,25141,Female,1986-08-22,2012-03-03,1.259666e+07
3,64459,Male,1999-11-02,2022-01-19,9.773070e+06
4,17677,Female,1985-06-03,2017-03-03,4.018211e+06
...,...,...,...,...,...
9995,36864,Female,1985-11-04,2011-09-06,1.371240e+07
9996,22090,Female,1993-11-08,2008-05-16,5.113434e+06
9997,44930,Male,1988-02-13,2019-07-30,3.852640e+06
9998,50239,Male,1987-09-11,2019-04-05,4.468979e+06


In [4]:
val_date = pd.Timestamp('2023-12-31')
data['age'] = np.round((val_date- data.dob)/np.timedelta64(1, 'Y'),2)
data['yos'] = np.round((val_date- data.doh)/np.timedelta64(1, 'Y'),2)

# Actuarial Assumptions

# Demographic Assumptions

Assume death probability follows 4th Indonesia Mortality Table. Disability probability is 1% of the former mortality table and resignation rate is 1% decreasing linearly from age 22 to age 56 (pension age).

In [5]:
mortality_base = pd.read_csv(r'data/TMI IV.csv')
pension_age = 56

In [6]:
def resignation_rate(entry_age, start_age = 22, end_age = pension_age, start_rate = 0.01, end_rate = 0):
    return start_rate +(end_rate - start_rate)*(np.arange(entry_age, end_age) - start_age)/(end_age - start_age) 

In [7]:
def demographic_table(table = mortality_base, employee_gender = None, employee_age = None, pension_age = pension_age):
    death = mortality_base[employee_gender].loc[int(employee_age):pension_age-1]
    disable = death*0.01
    resign = resignation_rate(entry_age = employee_age)
    return pd.DataFrame(data = {'death': death, 'disable': disable.values, 'resign' : resign}, 
                        index = np.arange(np.floor(employee_age), pension_age))
    

In [8]:
demographic_table(table = mortality_base,
                  employee_gender = data.gender.iloc[1],
                  employee_age = data.age.iloc[1]).shape

(28, 3)

In [9]:
def service_table(demographic_tbl):
    survive = np.ones((demographic_tbl.shape[0], 4))
    for i in range(demographic_tbl.shape[0]):
        survive[i,1] = survive[i,0] * demographic_tbl.iloc[i,0] * (1 - demographic_tbl.iloc[i,1]) * (1 - demographic_tbl.iloc[i,2])
        survive[i,2] = survive[i,0] * demographic_tbl.iloc[i,1] * (1 - demographic_tbl.iloc[i,0]) * (1 - demographic_tbl.iloc[i,2])
        survive[i,3] = survive[i,0] * demographic_tbl.iloc[i,2] * (1 - demographic_tbl.iloc[i,0]) * (1 - demographic_tbl.iloc[i,1])
        try : 
            survive[i+1,0] = survive[i,0] - np.sum(survive[i,1:])  
        except : 
            survive = np.append(survive, np.array([[survive[i,0] - np.sum(survive[i,1:]), 0, 0, 0]]), axis = 0)
    return pd.DataFrame(data = survive, columns = ['survive', 'death', 'disable', 'resign'], index = np.append(demographic_tbl.index, 56))

In [10]:
service_table(demographic_tbl = demographic_table(table = mortality_base,
                  employee_gender = data.gender.iloc[1],
                  employee_age = data.age.iloc[1]))

Unnamed: 0,survive,death,disable,resign
28.0,1.0,0.000645,6e-06,0.008142
29.0,0.991207,0.000688,7e-06,0.007778
30.0,0.982733,0.000731,7e-06,0.007423
31.0,0.974572,0.000784,8e-06,0.007074
32.0,0.966706,0.000835,8e-06,0.006733
33.0,0.95913,0.000886,9e-06,0.006398
34.0,0.951838,0.000936,9e-06,0.006069
35.0,0.944823,0.001005,1e-05,0.005746
36.0,0.938062,0.001082,1.1e-05,0.005429
37.0,0.931541,0.001177,1.2e-05,0.005117


## Financial Assumption

In [11]:
sev_svc = pd.DataFrame({'severance': [min(i+1,9) for i in range(60)],
                        'service' : [0,0,0,2,2,2,3,3,3,
                                     4,4,4,5,5,5,6,6,6,
                                     7,7,7,8,8,8,10,10,10,10,10,10,10,
                                     10,10,10,10,10,10,10,10,10,10,10,10,10,10,
                                     10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]})
ben_fac = pd.DataFrame({'retire': 1.75*sev_svc['severance']+sev_svc['service'],
                        'death': 2*sev_svc['severance']+sev_svc['service'],
                        'disable': 2*sev_svc['severance']+sev_svc['service'],
                        'resign': [1]*sev_svc.shape[0]})
ben_fac

Unnamed: 0,retire,death,disable,resign
0,1.75,2,2,1
1,3.5,4,4,1
2,5.25,6,6,1
3,9.0,10,10,1
4,10.75,12,12,1
5,12.5,14,14,1
6,15.25,17,17,1
7,17.0,19,19,1
8,18.75,21,21,1
9,19.75,22,22,1


In [12]:
def select_ben_fac(employee_age, employee_yos, pension_age, ben_fac = ben_fac):
    return ben_fac.iloc[int(employee_yos):int(employee_yos+pension_age-int(employee_age)), 1:]

In [13]:
select_ben_fac(employee_age=data.age.iloc[1], employee_yos= data.yos.iloc[1],pension_age=pension_age)

Unnamed: 0,death,disable,resign
0,2,2,1
1,4,4,1
2,6,6,1
3,10,10,1
4,12,12,1
5,14,14,1
6,17,17,1
7,19,19,1
8,21,21,1
9,22,22,1


### Discount Rate

In [14]:
yield_curve  = pd.read_csv(r'data/YieldCurve1Sep.csv')
yield_curve

Unnamed: 0,enor Year,Today
0,0.1,0.06158
1,1.0,0.061591
2,2.0,0.061629
3,3.0,0.061763
4,4.0,0.062007
5,5.0,0.062345
6,6.0,0.062754
7,7.0,0.063204
8,8.0,0.063671
9,9.0,0.064134


In [15]:
def spot_rates(yield_curve):
    spot_rate = np.zeros(yield_curve.shape[0])
    t = yield_curve.iloc[:,0]
    spot_rate[0] = yield_curve.iloc[0,1]
    spot_rate[1] = yield_curve.iloc[1,1]
    for i in range(2,yield_curve.shape[0]):
        sum = 0
        for j in range(1,i):
            sum += yield_curve.iloc[j,1]/(1+spot_rate[j])**t[j]
        spot_rate[i] = ((1+yield_curve.iloc[i,1])/(1-sum))**(1/t[i]) - 1   
    return pd.DataFrame(data = {'spot_rate' : spot_rate})

In [16]:
def discount_factor(employee_age, pension_age, rate, type = 'multi-rate'):
    if type == 'multi-rate':
        pass
    elif type == 'single-rate':
        rate = pd.DataFrame([rate]*(pension_age-int(employee_age)))
    else :
        raise 'Please define type of rate that being used either "multi-rate" or "single-rate"'
    return np.append(np.array([(1+rate.iloc[min(i,30)][0])**-i for i in range(pension_age-int(employee_age))]),
        (1+rate.iloc[min(30,pension_age-int(employee_age)-1)][0])**-(pension_age-employee_age))

In [17]:
discount_factor(employee_age= data.age.iloc[1], pension_age = pension_age ,rate= spot_rates(yield_curve), type ='multi-rate')

array([1.        , 0.94198236, 0.88729901, 0.83568462, 0.78689182,
       0.74071213, 0.69697421, 0.65554137, 0.61630088, 0.57915721,
       0.54402464, 0.51082366, 0.4794778 , 0.449911  , 0.42204703,
       0.39580997, 0.37112392, 0.34791291, 0.32610222, 0.30561816,
       0.2863886 , 0.26834382, 0.25141645, 0.23554165, 0.22065742,
       0.20670485, 0.19362763, 0.18137247, 0.17351984])

### Salary Increase 

In [18]:
salary_inc = 0.05
def salary_factor(employee_age, pension_age, salary_inc = salary_inc) :
    return np.append(np.array([(1+salary_inc)**i for i in range(pension_age - int(employee_age))]),
    (1+salary_inc)**(pension_age - employee_age))

In [19]:
salary_factor(employee_age= data.age.iloc[1], pension_age = pension_age)

array([1.        , 1.05      , 1.1025    , 1.157625  , 1.21550625,
       1.27628156, 1.34009564, 1.40710042, 1.47745544, 1.55132822,
       1.62889463, 1.71033936, 1.79585633, 1.88564914, 1.9799316 ,
       2.07892818, 2.18287459, 2.29201832, 2.40661923, 2.5269502 ,
       2.65329771, 2.78596259, 2.92526072, 3.07152376, 3.22509994,
       3.38635494, 3.55567269, 3.73345632, 3.8631679 ])

### Defined Benefit Obligation Factor

In [20]:
def dbo_factor(employee_age, employee_yos, pension_age = pension_age):
    return np.array([employee_yos/(employee_yos+i) for i in range(pension_age-int(employee_age))])

In [21]:
dbo_factor(employee_age=data.age.iloc[1], employee_yos=data.yos.iloc[1])

array([1.        , 0.33333333, 0.2       , 0.14285714, 0.11111111,
       0.09090909, 0.07692308, 0.06666667, 0.05882353, 0.05263158,
       0.04761905, 0.04347826, 0.04      , 0.03703704, 0.03448276,
       0.03225806, 0.03030303, 0.02857143, 0.02702703, 0.02564103,
       0.02439024, 0.02325581, 0.02222222, 0.0212766 , 0.02040816,
       0.01960784, 0.01886792, 0.01818182])

# Present Value Benefit

## Combining All Financial and Demographic Assumptions Excluding Pension Calculation

This approach can be rearranged if there is tax gross up. If there is tax gross up, flop will increase at least $n$ times with $2n$ with $n$ is 55-$\lfloor \text{entry age}\rfloor$

In [22]:
financial_assumption = np.multiply(np.multiply(discount_factor(employee_age= data.age.iloc[1], pension_age = pension_age ,rate= spot_rates(yield_curve), type ='multi-rate'),
            salary_factor(employee_age= data.age.iloc[1], pension_age = pension_age),
            ),np.append(dbo_factor(employee_age=data.age.iloc[1], employee_yos=data.yos.iloc[1]),1))
financial_assumption

array([1.        , 0.32969383, 0.19564943, 0.13820134, 0.10627466,
       0.08594157, 0.07184708, 0.06149417, 0.05356218, 0.04728752,
       0.04219804, 0.03798617, 0.03444293, 0.03142127, 0.02881463,
       0.02654389, 0.024549  , 0.02278351, 0.02121092, 0.0198021 ,
       0.01853352, 0.01738595, 0.01634353, 0.01539302, 0.01452331,
       0.01372502, 0.01299012, 0.01231175, 0.67033628])

In [23]:
svc_table = service_table(demographic_tbl = demographic_table(table = mortality_base,
                  employee_gender = data.gender.iloc[1],
                  employee_age = data.age.iloc[1]))

In [24]:
svc_table_dec = np.multiply(svc_table.iloc[:-1,1:].values,
                            np.vstack((np.vstack((financial_assumption[:-1],
                                                  financial_assumption[:-1])),
                                                  financial_assumption[:-1])).T)
svc_table_dec

array([[6.44700221e-04, 6.44285354e-06, 8.14171031e-03],
       [2.26958415e-04, 2.26801132e-06, 2.56448659e-03],
       [1.43112355e-04, 1.43006093e-06, 1.45224354e-03],
       [1.08303171e-04, 1.08216322e-06, 9.77662159e-04],
       [8.87568485e-05, 8.86804016e-07, 7.15503788e-04],
       [7.61465940e-05, 7.60764851e-07, 5.49818906e-04],
       [6.72701312e-05, 6.72041991e-07, 4.36032074e-04],
       [6.17890424e-05, 6.17235886e-07, 3.53351013e-04],
       [5.79454345e-05, 5.78788892e-07, 2.90782416e-04],
       [5.56354273e-05, 5.55654760e-07, 2.41965643e-04],
       [5.39866745e-05, 5.39123824e-07, 2.02968587e-04],
       [5.38506113e-05, 5.37679763e-07, 1.71222654e-04],
       [5.41616373e-05, 5.40688730e-07, 1.44985476e-04],
       [5.47883298e-05, 5.46836438e-07, 1.23030256e-04],
       [5.58953452e-05, 5.57758160e-07, 1.04466715e-04],
       [5.71109616e-05, 5.69746973e-07, 8.86348626e-05],
       [5.88278211e-05, 5.86705700e-07, 7.50323975e-05],
       [6.07100603e-05, 6.05285

In [25]:
ben_fac_employee = select_ben_fac(employee_age=data.age.iloc[1], 
                                  employee_yos= data.yos.iloc[1],
                                  pension_age=pension_age)

In [26]:
ben_fac_employee

Unnamed: 0,death,disable,resign
0,2,2,1
1,4,4,1
2,6,6,1
3,10,10,1
4,12,12,1
5,14,14,1
6,17,17,1
7,19,19,1
8,21,21,1
9,22,22,1


In [27]:
dbo_exclude_pension = np.multiply(svc_table_dec,ben_fac_employee.values)*data.salary.iloc[1]
dbo_exclude_pension

array([[6.04479752e+03, 6.04090767e+01, 3.81688952e+04],
       [4.25598633e+03, 4.25303691e+01, 1.20224887e+04],
       [4.02552309e+03, 4.02253412e+01, 6.80821711e+03],
       [5.07732677e+03, 5.07325523e+01, 4.58334711e+03],
       [4.99317816e+03, 4.98887750e+01, 3.35433073e+03],
       [4.99772622e+03, 4.99312477e+01, 2.57758866e+03],
       [5.36123864e+03, 5.35598404e+01, 2.04414820e+03],
       [5.50375393e+03, 5.49792374e+01, 1.65653373e+03],
       [5.70469542e+03, 5.69814407e+01, 1.36320787e+03],
       [5.73809920e+03, 5.73088460e+01, 1.13435149e+03],
       [5.56805095e+03, 5.56038865e+01, 9.51530626e+02],
       [5.55401773e+03, 5.54549496e+01, 8.02703518e+02],
       [5.84000963e+03, 5.83000727e+01, 6.79701835e+02],
       [5.90758312e+03, 5.89629528e+01, 5.76774261e+02],
       [6.02694769e+03, 6.01405939e+01, 4.89747109e+02],
       [6.42576232e+03, 6.41043072e+01, 4.15526302e+02],
       [6.61893243e+03, 6.60123955e+01, 3.51757015e+02],
       [6.83071002e+03, 6.81028

# IFRIC Implementation

Cut off age of IFRIC implementation is 56-24 = 32 years old. Therefore 

In [28]:
def ifric_cutoff(employee_age, employee_yos, pension_age):
    return int((pension_age - employee_age + employee_yos)/3)*3

In [29]:
def pension_ifric(employee_age, employee_yos, pension_age):
    return (employee_yos-(pension_age - employee_age + employee_yos-ifric_cutoff(employee_age, employee_yos, pension_age)))/ifric_cutoff(employee_age, employee_yos, pension_age)

In [30]:
def pension_pbo(employee_age, employee_yos, pension_age, financial_assumption, svc_table):
    return max(0,ben_fac.iloc[ifric_cutoff(employee_age, employee_yos, pension_age),0]*pension_ifric(employee_age, employee_yos, pension_age)*financial_assumption[-1]*svc_table.iloc[-1,0])

In [31]:
pension = pension_pbo(data.age.iloc[1], data.yos.iloc[1], pension_age, financial_assumption, svc_table)*data.salary.iloc[1]

# Total for Employee 1

In [32]:
pension + np.sum(dbo_exclude_pension)

271299.36965318443

# Calculate for All Employee

In [33]:
def calculate_pbo(i, data, pension_age):
    financial_assumption = np.multiply(np.multiply(discount_factor(employee_age= data.age.iloc[i], 
                                                                   pension_age = pension_age,
                                                                   rate= spot_rates(yield_curve), type ='multi-rate'),            
                                                                   salary_factor(employee_age= data.age.iloc[i], pension_age = pension_age)),
                                                                   np.append(dbo_factor(employee_age=data.age.iloc[i], 
                                                                                        employee_yos=data.yos.iloc[i]),1))
    svc_table = service_table(demographic_tbl = demographic_table(table = mortality_base,
                  employee_gender = data.gender.iloc[i],
                  employee_age = data.age.iloc[i]))
    svc_table_dec = np.multiply(svc_table.iloc[:-1,1:].values,
                                np.vstack((np.vstack((financial_assumption[:-1],
                                                      financial_assumption[:-1])),
                                                      financial_assumption[:-1])).T)
    
    ben_fac_employee = select_ben_fac(employee_age=data.age.iloc[i], 
                                      employee_yos= data.yos.iloc[i],
                                      pension_age=pension_age)
    dbo_exclude_pension = np.multiply(svc_table_dec,
                                      ben_fac_employee.values)*data.salary.iloc[i]
    
    pension = pension_pbo(data.age.iloc[i], data.yos.iloc[i], 
                          pension_age, financial_assumption, 
                          svc_table)*data.salary.iloc[i]
    
    return pension + np.sum(dbo_exclude_pension)

In [34]:
pbo = data[['id']]
pbo['pbo'] = [calculate_pbo(i, data = data, pension_age=pension_age) for i in range(data.shape[0])]

  return np.array([employee_yos/(employee_yos+i) for i in range(pension_age-int(employee_age))])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pbo['pbo'] = [calculate_pbo(i, data = data, pension_age=pension_age) for i in range(data.shape[0])]


In [35]:
pbo

Unnamed: 0,id,pbo
0,18562,2.145979e+07
1,89107,2.712994e+05
2,25141,9.370022e+07
3,64459,6.045382e+06
4,17677,2.220906e+07
...,...,...
9995,36864,1.108414e+08
9996,22090,2.932180e+07
9997,44930,1.229319e+07
9998,50239,1.592052e+07


# Result

With 1000 data, it took around 19 seconds to perform PBO calculation without tax, and it supposedly it took 190 seconds or 3 mins to perform the calculation of 10,000 data.