In [1]:
import lime
import sklearn
import warnings
import itertools
import numpy as np
import pandas as pd
import lime.lime_tabular
import googleapiclient.discovery

from google.cloud import storage

In [2]:
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [3]:
storage_client = storage.Client(project='ml-clv')
bucket = storage_client.get_bucket('ml-clv_composer_final')
blob = bucket.blob('data/train.csv')
train =  blob.download_as_string(storage_client).split('\n')
blob = bucket.blob('data/eval.csv')
test =  blob.download_as_string(storage_client).split('\n')

In [4]:
feature_names = ['customer_id', 'monetary_dnn', 'recency', 'frequency_dnn', 'avg_basket_value', 'avg_basket_size',
                 'T', 'time_between', 'cnt_returns', 'has_returned']

headers = ['customer_id', 'monetary_dnn', 'monetary_btyd', 'frequency_dnn',
           'frequency_btyd', 'recency', 'T', 'time_between',
           'avg_basket_value', 'avg_basket_size', 'cnt_returns',
           'has_returned', 'frequency_btyd_clipped', 'monetary_btyd_clipped',
           'target_monetary_clipped', 'target_monetary']

categorical_features = [8]

In [5]:
def convert_to_matrix(data):
    new_lst = []
    for string in data[1:]:
        new_lst.append(string.split(','))

    df = pd.DataFrame.from_records(new_lst)
    df.columns = headers
    df = df.dropna()
    df = df.loc[:,feature_names]
    return df.values.astype(float)

In [6]:
train = convert_to_matrix(train)
test = convert_to_matrix(test)

In [7]:
explainer = lime.lime_tabular.LimeTabularExplainer(
    train,
    feature_names=feature_names,
    class_names=['target_monetary'],
    categorical_features=categorical_features,
    verbose=True,
    mode='regression',
    )

In [8]:
def predict(project, model, record, version=None):
    service = googleapiclient.discovery.build('ml', 'v1')
    name = 'projects/{}/models/{}'.format(project, model)

    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': record}
    ).execute()

    if 'error' in response:
        raise RuntimeError(response['error'])

    return response['predictions']

In [13]:
def transform_row(x):
    x = x.astype(str)
    x[2] = str(int(float(x[2])))
    x[3] = str(int(float(x[3])))
    x[4] = str(int(float(x[4])))
    x[8] = str(int(float(x[8])))
    x[9] = str(int(float(x[9])))
    return ','.join(list(x))

In [20]:
def lime_predict(rows):
    if len(rows.shape) > 1:
        predictions = []
        for row in rows:
            prediction = predict('ml-clv',
                                 'dnn_airflow',
                                 transform_row(row), 'v1'
                                 )[0]['predicted_monetary']
            predictions.append(prediction)
    else:
        predictions = predict('ml-clv',
                              'dnn_airflow',
                              transform_row(row), 'v1'
                              )[0]['predicted_monetary']
    return np.array(predictions)

In [23]:
i = 100
exp = explainer.explain_instance(test[i], lime_predict, num_features=5, num_samples=10)

Intercept 6628.593982662236
Prediction_local [119.86616778]
Right: -935.5283203125


In [27]:
exp.show_in_notebook(show_table=True)

In [26]:
exp.as_list()

[('monetary_dnn <= 1172.59', -4307.906646344201),
 ('recency <= 292.00', -1981.0449000908645),
 ('avg_basket_size <= 9.16', -1955.5095914510594),
 ('2435715214.00 < customer_id <= 3894666163.00', 1519.27976649363),
 ('306.00 < T <= 331.00', 216.45355650979798)]