In [541]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [542]:
#!pip install chainladder

In [543]:
import chainladder as cl
from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
import statsmodels.api as sm
import statsmodels.formula.api as smf
import math
from sklearn.preprocessing import PolynomialFeatures

# **Data Preprocessing**

In [544]:
mortgage_data=pd.read_csv('../input/reinsurance-data-final/reinsurance_data_IV/claims_reinsurance.csv')
mortgage_data.head()

In [545]:
mortgage_data['Amount']=mortgage_data['Amount'].divide(1000000000000)#.divide(1000000)
mortgage_data.head()

In [546]:
dev_tri=cl.Triangle(mortgage_data,origin=['Accident/underwriting year'],development=['Development year'],columns=['Amount'],cumulative=True)
dev_tri

In [547]:
dev_tri.latest_diagonal

In [548]:
dev_tri.link_ratio.heatmap()

In [549]:
dev_tri_delp=cl.Development(average='simple').fit(dev_tri)
data=dev_tri_delp.cdf_
frame=data.to_frame(origin_as_datetime=False)
frame.head()

In [550]:
link_ratios=list()
development_periods=list()
for i in range(0,10):
    link_ratios.append(frame.iloc[0][i])
    development_periods.append(i)
dictionary={'Link_Ratios':link_ratios,'Dev':development_periods}

TF=pd.DataFrame(dictionary)
TF

In [551]:
frame.columns

In [552]:
def get_log_values(f):
  lv=[]
  n=len(f)
  k=0
  for k in range(0,n):
    lv.append(math.log(f[k])-1)
  return lv

#dev=list()
#for i in range(1,len(TF['Dev'].tolist())):
#    dev.append(TF.iloc[i][1])

In [553]:
f=TF['Link_Ratios'].tolist()
log_values=get_log_values(f)
dev=TF['Dev'].tolist()
dat=pd.DataFrame(list(zip(log_values,dev)),columns=['log(f)-1','dev'])
dat

In [554]:
y=dat['log(f)-1']
X=dat['dev']
X=sm.add_constant(X)
reg_model=sm.OLS(y,X).fit()
print(reg_model.summary())

In [555]:
pred_dev=[10,11]
pred_dict={'dev':pred_dev}
X_pred=pd.DataFrame(pred_dict)
X_pred=sm.add_constant(X_pred)
predict=reg_model.predict(X_pred)
print(predict)

In [556]:
sns.regplot(x='dev',y='log(f)-1',data=dat)

In [557]:
polynomial_features= PolynomialFeatures(degree=2)
X_poly = polynomial_features.fit_transform(X)

In [558]:
print(X_poly)

In [559]:
model = sm.OLS(y, X_poly).fit() 
print(model.summary())

In [560]:
X_pred=polynomial_features.fit_transform(X_pred)
y_pred=model.predict(X_pred)
print(y_pred)

In [561]:
y_pred=list(y_pred)
print(len(y_pred))

In [562]:
link_ratios=list()
for i in range(0,len(y_pred)):
    link_ratios.append(math.exp(y_pred[i]-1))
print(link_ratios)

In [563]:
dev_tri_devlp=cl.Development().fit(dev_tri)
dev_tri_devlp.ldf_

In [564]:
dev_tri.T.plot(
    marker='.', grid=True,
    title='plot of cumulative claims payment').set(
    xlabel='Development Period', ylabel='Cumulative Paid Loss')

In [565]:
dev_tri.cum_to_incr().T.plot(
    marker='.', grid=True,
    title='plot of cumulative claims payment').set(
    xlabel='Development Period', ylabel='Cumulative Paid Loss')

# **Deterministic Methods**

# **Chain Ladder Method**

In [566]:
# Chainladder Method
chainladder_model=cl.Chainladder().fit(dev_tri)
ft=chainladder_model.full_triangle_
ft

In [567]:
latest_diagonal_sum=chainladder_model.latest_diagonal.sum()
print(latest_diagonal_sum)

In [568]:
ultimate_sum=chainladder_model.ultimate_.sum()
print(ultimate_sum)

In [569]:
ibnr_reserve=ultimate_sum-latest_diagonal_sum
print(ibnr_reserve)

In [598]:
ultimate=chainladder_model.ultimate_.to_frame()
ultimate=ultimate.dropna(axis=0)
ultimate=ultimate['2261'].tolist()
print(ultimate)

In [599]:
new_ultimate=[i*link_ratios[0] for i in ultimate]
print(new_ultimate)

# **Stochastic Methods**

# **Mack Chain Ladder Method**

In [570]:
mack=cl.MackChainladder()
dev=cl.Development(average='volume')
mack.fit(dev.fit_transform(dev_tri))

mack.mack_std_err_

In [571]:
summary=mack.summary_.to_frame(origin_as_datetime=False)
summary

In [572]:
mack.full_triangle_

In [573]:
mack_ibnr=sum(summary['Ultimate'])-sum(summary['Latest'])
mack_ibnr

# **Insurance Valuation**

In [574]:
def case_scenario_generator(reserve,premiums,num_scenarios):
  premiums=sum(premiums)
  loss_ratio=reserve/premiums
  print('The initial loss ratio is '+str(loss_ratio))
  loss,c,p=list(),list(),list()
  new_reserve,new_premiums,new_loss_ratio=0,0,0.00
  for i in range(0,num_scenarios):
    pr_random=random.rand(1)
    pr_random=pr_random[0]
    cl_random=random.rand(1)
    cl_random=cl_random[0]
    new_reserve=(cl_random*reserve)+reserve
    new_premiums=(pr_random*premiums)+premiums
    new_loss_ratio=new_reserve/new_premiums
    loss.append(new_loss_ratio)
    c.append(new_reserve)
    p.append(new_premiums)
    
  return loss,c,p

In [575]:
def plot(loss,c,p):
  x,y,z=c,p,loss
  fig=plt.figure()
  ax=plt.axes(projection ='3d')
  ax.scatter3D(x,y,z,'green')
  ax.set_title('Loss Ratio Vs Claims Vs Premiums')
  fig.set_size_inches(8,8)
  plt.show()

In [576]:
premiums=pd.read_csv('../input/reinsurance-data-final/reinsurance_data_IV/premiums_reinsurance.csv')
premiums['Amount']=premiums['Amount'].divide(1000000000000)
premiums=premiums['Amount'].tolist()
#premiums=premiums[10:19]
#print(len(premiums))
print(premiums)

In [577]:
loss,c,p=case_scenario_generator(ultimate_sum,premiums,1000)

In [578]:
plot(loss[0:100],c[0:100],p[0:100])

In [579]:
plt.scatter(loss[0:100],c[0:100])
plt.show()

In [580]:
plt.scatter(loss[0:100],p[0:100],color='red')
plt.show()

# **Modeling of the Premiums with respect to Loss Ratio and Claims**

In [581]:
data={'Claims':c,'Premiums':p,'Loss_Ratio':loss}
valuation=pd.DataFrame(data)
valuation

In [582]:
#valuation['Claims']=valuation['Claims'].divide(1000000000000)
#valuation['Premiums']=valuation['Premiums'].divide(1000000000000)
valuation['Loss_Ratio']=valuation['Loss_Ratio'].multiply(100)
valuation

In [583]:
ax=sns.heatmap(valuation.corr(),annot=True)

In [584]:
#valuation['Claims']=valuation['Claims'].divide(10000000)
#valuation['Premiums']=valuation['Premiums'].divide(10000000)
#valuation.head()

In [585]:
X=valuation[['Loss_Ratio','Claims']]
y=valuation['Premiums']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1)

# **1. Linear Regression**

In [586]:
# Using sklearn

regr=LinearRegression()
regr.fit(X_train, y_train)
y_pred=regr.predict(X_test)
y_test=y_test.tolist()

In [587]:
# Using statsmodel 
X_train,X_test=sm.add_constant(X_train),sm.add_constant(X_test)
model=sm.OLS(y_train,X_train).fit()
print(model.summary())

In [588]:
pred=model.predict(X_test)

In [589]:
print('Predicted Data:')
print(pred.tolist()[0:9])
print('\n')
print('Actual Data:')
print(y_test[0:9])
print('\n')

In [590]:
lr=47.207
difference=list()
for i in range(0,len(valuation['Premiums'])):
    if valuation.iloc[i][2]<lr:
        difference.append(valuation.iloc[i][1]-valuation.iloc[i][0])
#print(difference)
average=sum(difference)/len(difference)
print(average)

In [591]:
#valuation.to_csv('valuation.csv')