In [1]:
import lime
import sklearn
import warnings
import pandas as pd
import numpy as np
import lime.lime_tabular
import googleapiclient.discovery

from google.cloud import storage

In [2]:
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [3]:
storage_client = storage.Client(project='energy-forecasting')
bucket = storage_client.get_bucket('energyforecast')
blob = bucket.blob('data/csv/MLDataTrain.csv')
train =  blob.download_as_string(storage_client).split('\n')
blob = bucket.blob('data/csv/MLDataTest.csv')
test =  blob.download_as_string(storage_client).split('\n')

In [4]:
feature_names = train[0].split(",")
categorical_features = [2, 3]

In [5]:
def convert_to_matrix(data):
    new_lst = []
    for string in data[1:]:
        new_lst.append(string.split(','))

    df = pd.DataFrame.from_records(new_lst)
    df = df.dropna()
    df.head()

    le = sklearn.preprocessing.LabelEncoder()
    df.iloc[:, 1] = le.fit_transform(df.iloc[:, 1])

    return (df.values.astype(float), le)

In [6]:
train, le = convert_to_matrix(train)
test, _ = convert_to_matrix(test)

In [7]:
explainer = lime.lime_tabular.LimeTabularExplainer(
    train,
    feature_names=feature_names,
    class_names=['energy'],
    categorical_features=categorical_features,
    verbose=True,
    mode='regression',
    )

In [8]:
def predict(project, model, record, version=None):
    service = googleapiclient.discovery.build('ml', 'v1')
    name = 'projects/{}/models/{}'.format(project, model)

    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': record}
    ).execute()

    if 'error' in response:
        raise RuntimeError(response['error'])

    return response['predictions']

In [9]:
def refine(x):
    x = x.split(',')
    x[2] = str(int(float(x[2])))
    x[3] = str(int(float(x[3])))
    return ','.join(x)

def transform_row(x):
    x = x.astype(str)
    x[1] = le.inverse_transform([int(float(x[1]))])[0]
    tmp = ','.join(list(x))
    refiend_row = refine(tmp)
    return refiend_row

In [10]:
def lime_predict(rows):
    if len(rows.shape) > 1:
        predictions = []
        for row in rows:
            prediction = predict('energy-forecasting',
                                 'energyforecaster',
                                 transform_row(row), 'new_energy'
                                 )[0]['predictions']
            predictions.extend(prediction)
    else:
        predictions = predict('energy-forecasting', 'energyforecaster',
                              transform_row(rows), 'new_energy'
                              )[0]['predictions']
    return np.array(predictions)

In [11]:
i = 105
exp = explainer.explain_instance(test[i], lime_predict, num_features=5, num_samples=10)

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 1.02278981652e-16 / 1.11022302463e-16
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 4.95982687778e-18 / 1.11022302463e-16
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number/precision: 1.11381518066e-17 / 1.11022302463e-16


Intercept 0.5003099639872012
Prediction_local [0.53224088]
Right: 0.531932532787323


In [None]:
exp.show_in_notebook(show_table=True)