In [4]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_log_error

class MyModel(torch.nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_size, 64)
        self.fc2 = torch.nn.Linear(64, 16)
        self.fc3 = torch.nn.Linear(16, 1)
            
    def forward(self, x):
        fc1_op = torch.nn.functional.relu(self.fc1(x))
        fc2_op = torch.nn.functional.relu(self.fc2(fc1_op))
        return self.fc3(fc2_op)


def train(x, y, model, epochs=1, bs=512, lr=1e-3):
    iters = len(x)//bs
    x, y = torch.FloatTensor(x.astype('float32').values), torch.FloatTensor(y.values)
#     optim = torch.optim.Adam(model.parameters(), lr=lr)
    floss = torch.nn.BCEWithLogitsLoss()
    losses = []
    for e in range(epochs):
        if e % 10 == 0 and e > 0:
            lr *= 0.9
            print ('lr', lr)
        optim = torch.optim.Adam(model.parameters(), lr=lr)
        ind = np.random.permutation(range(len(x)))
        x, y = x[ind], y[ind]
        for i in range(iters):
            batch_x, batch_y = x[i*bs:(i+1)*bs], y[i*bs:(i+1)*bs]
            optim.zero_grad()
            loss = floss(model(batch_x), batch_y)
            error_rmsle = rmsle(model(batch_x), batch_y )
            losses.append(loss.item())
            loss.backward()
            optim.step()
        print(f'Epoch {e}: Loss {np.mean(losses)} RMSLE {np.mean(error_rmsle)}')

def predict(x, model):
    x = torch.FloatTensor(x.astype('float32').values)
    return torch.nn.functional.sigmoid(model(x)).detach().numpy()[:, 0]

df_train = pd.read_csv('train.csv', encoding="ISO-8859-1")
df_train['train'] = 1
df_test = pd.read_csv('test.csv', encoding="ISO-8859-1")
df_test['train'] = 0
df_all = df_train.append(df_test)
aus_mean = df_all.application_underwriting_score.mean()
df_all.fillna({'application_underwriting_score': aus_mean}, inplace=True)
df_all.fillna({'Count_3-6_months_late': 0, 'Count_6-12_months_late': 0, \
                     'Count_more_than_12_months_late': 0}, inplace=True)

print(df_test.shape, df_train.shape, df_all.shape)

OHE_columns = [x for x in df_all.columns if x == 'sourcing_channel']
df_all = pd.get_dummies(df_all, columns= OHE_columns)
df_all.residence_area_type = pd.factorize(df_all.residence_area_type)[0]
df_all['amount_paid'] = df_all.no_of_premiums_paid * df_all.premium
df_train = df_all[df_all.train == 1]
df_train.reset_index(drop = True, inplace=True)
df_test = df_all[df_all.train == 0].reset_index(drop = True)
df_test.reset_index(drop = True, inplace=True)
print(df_test.shape, df_train.shape, df_all.shape)

np.random.seed(1)
train_rows = np.random.choice(df_train.index, int(len(df_train)* 0.8), replace=False)
valid_rows = [x for x in df_train.index if x not in train_rows]
df_train1 = df_train.loc[train_rows]
df_valid1 = df_train.loc[valid_rows]

train_X = df_train1.drop(['id', 'train', 'renewal'], axis = 1)
scaler = MinMaxScaler()
train_X = pd.DataFrame(scaler.fit_transform(train_X), columns = train_X.columns)
train_Y = df_train1[['renewal']]
valid_X = df_valid1.drop(['id', 'train', 'renewal'], axis = 1)
scaler = MinMaxScaler()
valid_X = pd.DataFrame(scaler.fit_transform(valid_X), columns = valid_X.columns)
valid_Y = df_valid1[['renewal']]
test_X = df_test.drop(['id', 'train', 'renewal'], axis = 1)
test_X = pd.DataFrame(scaler.fit_transform(test_X), columns = test_X.columns)

print(train_X.shape, train_Y.shape, test_X.shape, valid_X.shape, train_X.Income.mean())

np.random.seed(0)
torch.manual_seed(0)
model = MyModel(train_X.shape[1])

train(train_X, train_Y, model, epochs=1000, lr = 0.005 )

In [None]:
train_pred = predict(train_X, model)
print(f'train auc: {roc_auc_score(train_Y, train_pred)}')

val_pred = predict(valid_X, model)
print (f'val auc: {roc_auc_score(valid_Y, val_pred)}')
test_pred = predict(test_X, model)

In [None]:
submission = df_test[['id', 'premium']]
submission['renewal'] = test_pred
submission.head(2)

In [6]:
imp_per = lambda x: 20*(1-np.exp(-2*(1-np.exp(-x/400))))
inc_rev = lambda p, pr, inc: p*pr*imp_per(inc)/100. - inc
def opt_inc(p, pr):
    if inc_rev(p, pr, 1)<0: return 0
    max_rev = 0
    max_inc = 0
    inc = 1
    while inc_rev(p, pr, inc) > max_rev:
        max_rev = inc_rev(p, pr, inc)
        max_inc = inc #16
        inc *= 2 #32
    fac = inc / 4. #8
    inc -= fac #24
    fac /= 2. #4
    while fac>=1:
        if inc_rev(p, pr, inc)>max_rev:
            max_rev = inc_rev(p, pr, inc)
            max_inc = inc
            inc += fac
        else:
            inc -= fac
        fac -= 1.
    return max_inc

submission['incentives'] = submission.apply(lambda x: opt_inc(x.renewal, x.premium), axis=1)
# final_df1.incentives = [0 if x <= 0 else x for x in final_df1.incentives]
submission.drop('premium', axis = 1).to_csv('NN_8287_binarysearch_modified.csv', index=False)
submission.head()

Unnamed: 0,id,premium,renewal,incentives
0,649,3300,0.996194,192.0
1,81136,11700,0.987653,443.0
2,70762,11700,0.939059,431.0
3,53935,5400,0.977605,271.0
4,15476,9600,0.938666,387.0
