In [None]:
from __future__ import print_function

import pandas as pd 
import numpy as np 
from ml_metrics import quadratic_weighted_kappa
import random

random.seed(23)

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils
from keras import optimizers
from scipy.optimize import fmin_powell
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler

def eval_wrapper(y, yhat):  
    y = np.array(y)
    y = y.astype(int)
    yhat = np.array(yhat)
    yhat = np.clip(np.round(yhat), np.min(y), np.max(y)).astype(int)   
    return quadratic_weighted_kappa(yhat, y)
    
def preprocess_labels(labels, encoder=None, categorical=True):
    if not encoder:
        encoder = LabelEncoder()
        encoder.fit(labels)
    y = encoder.transform(labels).astype(np.int32)
    if categorical:
        y = np_utils.to_categorical(y)
    return y, encoder
    
def apply_offset(data, bin_offset, sv, scorer=eval_wrapper):
    # data has the format of pred=0, offset_pred=1, labels=2 in the first dim
    data[1, data[0].astype(int)==sv] = data[0, data[0].astype(int)==sv] + bin_offset
    score = scorer(data[1], data[2])
    return score
    
# global variables
columns_to_drop = ['Id', 'Response', 'Medical_History_10', 'Medical_History_24', 'Medical_History_32']
num_classes = 8

print("Load the data using pandas")
train = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")


# combine train and test
all_data = train.append(test)

# create any new variables    
all_data['Product_Info_2_char'] = all_data.Product_Info_2.str[1]
all_data['Product_Info_2_num'] = all_data.Product_Info_2.str[2]

# factorize categorical variables
all_data['Product_Info_2'] = pd.factorize(all_data['Product_Info_2'])[0]
all_data['Product_Info_2_char'] = pd.factorize(all_data['Product_Info_2_char'])[0]
all_data['Product_Info_2_num'] = pd.factorize(all_data['Product_Info_2_num'])[0]
all_data['BMI_Age'] = all_data['BMI'] * all_data['Ins_Age']
med_keyword_columns = all_data.columns[all_data.columns.str.startswith('Medical_Keyword_')]
all_data['Med_Keywords_Count'] = all_data[med_keyword_columns].sum(axis=1)

print('Eliminate missing values')    
# Use -1 for any others
all_data.fillna(-1, inplace=True)

# fix the dtype on the label column
all_data['Response'] = all_data['Response'].astype(int)

# Provide split column
all_data['Split'] = np.random.randint(5, size=all_data.shape[0])

#to-do: Add one-hot encoding for all categorical variables

# split train and test
train = all_data[all_data['Response']>0].copy()
test = all_data[all_data['Response']<1].copy()


#### CODE GOES HERE ########
testId = test["Id"]
labels = train["Response"]

labels_test = test["Response"]
y, encoder = preprocess_labels(labels)
train = train.drop(columns_to_drop, axis = 1)
test = test.drop(columns_to_drop, axis = 1)

train, scaler = preprocess_data(train)
test, _ = preprocess_data(test, scaler)

dims = train.shape[1]


print(dims, 'dims')

print('Building model...')

model = Sequential()
model.add(Dense(output_dim=32, init='glorot_uniform', input_dim=dims, activation='tanh'))
model.add(Dropout(0.1))

model.add(Dense(32, init='glorot_uniform'))
model.add(PReLU())

model.add(Dense(num_classes, init='glorot_uniform'))
model.add(Activation('softmax'))
SGDopt = optimizers.SGD(lr=0.01, momentum=0.001, decay=0.0, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=SGDopt)

train = np.array(train, dtype=np.float32)
y = np.array(y, dtype=np.int32)

print('Training model...')
model.fit(train, y, nb_epoch = 10, batch_size = 16, validation_split = 0.4, verbose = 0)

print('Generating submission...')
test = np.array(test, dtype=np.float32)
test_preds = model.predict_classes(test, batch_size = 50, verbose = 0)
test_preds += 1

train_preds = model.predict_classes(train, batch_size = 50, verbose = 0)
train_preds += 1

train_probs = model.predict_proba(train, batch_size = 50, verbose = 0)
test_probs = model.predict_proba(test, batch_size = 50, verbose = 0)

classes = np.array(range(num_classes))+1
expected_value_train = np.dot(train_probs, classes)
expected_value_test = np.dot(test_probs, classes)

train_preds = expected_value_train
test_preds = expected_value_test

expected_value_train = np.clip(expected_value_train, 1, 8)
expected_value_test = np.clip(expected_value_test, 1, 8)

expected_value_train = np.round(expected_value_train).astype(int)
expected_value_test = np.round(expected_value_test).astype(int)

print('Train score: ',eval_wrapper(expected_value_train, labels))

train_preds = np.clip(train_preds, -0.99, 8.99)
test_preds = np.clip(test_preds, -0.99, 8.99)

# train offsets 
offsets = np.array([-1, -1, -1, -1, -1, -1, -1, -1])
offset_train_preds = np.vstack((train_preds, train_preds, labels))
for j in range(num_classes):
    train_offset = lambda x: -apply_offset(offset_train_preds, x, j)
    offsets[j] = fmin_powell(train_offset, offsets[j])  

# apply offsets to test
data = np.vstack((test_preds, test_preds, labels_test.values))
for j in range(num_classes):
    data[1, data[0].astype(int)==j] = data[0, data[0].astype(int)==j] + offsets[j] 

final_test_preds = np.round(np.clip(data[1], 1, 8)).astype(int)

preds_out = pd.DataFrame({"Id": testId, "Response": final_test_preds})
preds_out = preds_out.set_index('Id')
preds_out.to_csv('keras_expv.csv')