# Configurations

In [10]:
from IPython.display import display, HTML
import numpy as np
import pandas as pd

Test_file     = './Dataset/test_66516Ee.csv'
Test_file     = './Dataset/tst_renewal_predicted.csv'

# Test_dtype = {'id':np.str, 'perc_premium_paid_by_cash_credit':np.float32, 
# 'age_in_days':np.float32, 
# 'Income':np.float32,
# 'Count_3-6_months_late':np.float32, 
# 'Count_6-12_months_late':np.float32,
# 'Count_more_than_12_months_late':np.float32, 
# 'application_underwriting_score':np.float32,
# 'no_of_premiums_paid':np.float32, 
# 'sourcing_channel':np.str, 
# 'residence_area_type':np.str,
# 'premium':np.int}

Test_dtype = {'id':np.str, 
              'premium':np.int, 
              'renewal':np.float32,
              'incentives':np.float32
             }



## Loading dataset

In [11]:
# Loading dataset
tst_origin = pd.read_csv(Test_file, dtype=Test_dtype, na_values='')

display(tst_origin.head())

#tst_data = tst_origin[['id', 'premium']].copy()
tst_data = tst_origin

#Check the shape of each dataset
print(tst_data.shape)

Unnamed: 0,id,premium,renewal,improvement,incentives
0,649,3300,0.98761,0.0,0.0
1,81136,11700,0.97732,0.0,0.0
2,70762,11700,0.914971,0.0,0.0
3,53935,5400,0.965756,0.0,0.0
4,15476,9600,0.950142,0.0,0.0


(34224, 5)


### Arbitrarily put renewal probabilities for each policy

In [12]:
#pd_renewal = pd.DataFrame(np.random.rand(len(tst_data),1), columns=['renewal'])
#pd_incentives = pd.DataFrame(np.zeros((len(tst_data), 1)), columns=['incentives'], dtype=np.float32)
#pd_improve = pd.DataFrame(np.zeros((len(tst_data), 1)), columns=['improvement'], dtype=np.float32)
#tst_data2 = pd.concat([tst_data, pd_renewal, pd_improve, pd_incentives], axis=1)

tst_data2 = tst_data
display(tst_data2.head())
tst_data2.shape

Unnamed: 0,id,premium,renewal,improvement,incentives
0,649,3300,0.98761,0.0,0.0
1,81136,11700,0.97732,0.0,0.0
2,70762,11700,0.914971,0.0,0.0
3,53935,5400,0.965756,0.0,0.0
4,15476,9600,0.950142,0.0,0.0


(34224, 5)

### Build equations

In [13]:
import math

def inc_to_eff(inc):
    return 10*(1-np.exp(-1.0*inc/400.0))

def eff_to_imp(eff):
    return 20*(1-np.exp(-1.0*eff/5.0))/100

# def derivative(inc, delta):
#     y1 = eff_to_imp(inc_to_eff(inc))
#     y2 = eff_to_imp(inc_to_eff(inc+delta))
#     return (y2-y1)/delta

def rev_policy(renewal, premium, inc):
    #print('--', renewal, eff_to_imp(inc_to_eff(inc)), premium, inc )
    return (renewal + renewal*eff_to_imp(inc_to_eff(inc))) * premium - inc

def derivatives(renewal, premium, inc, delta=1):
    y1 = rev_policy(renewal, premium, inc)
    y2 = rev_policy(renewal, premium, inc+delta)
    return (y2-y1)/delta

def total_net_rev(dataset):
    return dataset['']


In [4]:
print(inc_to_eff(1000))
print(eff_to_imp(10))
print(eff_to_imp(inc_to_eff(100)))
print(derivatives(0.5, 10000, 100, 1))
print(derivatives(0.5, 1000, 100, 1))
print(derivatives(0.8, 10000, 100, 1))
print(derivatives(0.5, 10000, 10, 1))


9.179150013761012
0.17293294335267745
0.0715012813499204
1.4938951338181141
-0.7506104866181431
2.990232214107891
3.624525429319874


### Ok, Let's find optimal solution!!!!

In [None]:
import time

#unit_inc = 100
learning_rate = 0.8
delta = 1
MAX_ITER = 100000
t1 = time.time()

itr = 0
prev_total_net_rev = 0
total_net_rev = 0

while True:
    if itr > 2  and total_net_rev-prev_total_net_rev < 10:
        print('Total net revenue is below 1!')
        break
    if itr > MAX_ITER:
        print('Maximum iteration reached.')
        break
    result = derivatives(tst_data2['renewal'], tst_data2['premium'], tst_data2['incentives'], delta)
    #print(result.head())
    cand_idx, deriv = result.idxmax(), result.max()
    #data.loc[data.bidder == 'parakeet2004', 'bidderrate'] = 100
    tst_data2.loc[cand_idx, 'incentives'] = tst_data2.loc[cand_idx, 'incentives'] + learning_rate * deriv
    tst_data2.loc[cand_idx, 'improvement'] = eff_to_imp(inc_to_eff(tst_data2.loc[cand_idx, 'incentives']))
    
    prev_total_net_rev = total_net_rev
    total_net_rev = (tst_data2['renewal'] + tst_data2['renewal']*tst_data2['improvement']) * tst_data2['premium'] - tst_data2['incentives']
    total_net_rev = np.sum(total_net_rev)
    
    if itr%100 == 0:
        print('%d\t%d\t%13.3f\t%13.3f\t%13.3f' %(itr, cand_idx, deriv, total_net_rev, total_net_rev-prev_total_net_rev))
    
    itr += 1

print(round(time.time() - t1), 'seconds.')
    
#np.exp(1/tst_data2['premium'])

0	27171	       16.307	357038017.776	357038017.776
100	25478	       16.239	357058265.078	      203.644
200	12962	       16.196	357078468.167	      200.231
300	4188	       16.167	357098607.363	      201.510
400	8364	       16.135	357118691.108	      201.032
500	12217	       16.113	357138720.497	      200.586
600	23287	       16.093	357158692.625	      200.035
700	27184	       16.069	357178587.301	      198.875
800	23508	       16.046	357198414.766	      198.062
900	3399	       16.029	357218178.132	      197.724
1000	20419	       16.014	357237910.947	      197.143
1100	16678	       16.000	357257611.353	      197.109
1200	27067	       15.986	357277258.693	      196.880
1300	10161	       15.973	357296876.979	      196.155
1400	11165	       15.962	357316460.363	      195.707
1500	4287	       15.950	357336011.943	      195.595
1600	22926	       15.940	357355537.215	      195.293
1700	26842	       15.931	357375030.858	      194.523
1800	7948	       15.922	357394500.396	      194.483
1900	5361	

## Save output file for submission

In [17]:
tst_data2[['id', 'renewal', 'incentives']].to_csv('./Dataset/output_submission.csv', index=False)