In [1]:
import numpy as np
import pandas as pd
import json
import dill

In [2]:
model_name = "SEG_retrain"

In [3]:
train_set_original = np.genfromtxt("data/{}_train_set_original.csv".format(model_name), delimiter="\n", dtype=np.int64)
val_set_original = np.genfromtxt("data/{}_val_set_original.csv".format(model_name), delimiter="\n", dtype=np.int64)
test_set_original = np.genfromtxt("data/{}_test_set_original.csv".format(model_name), delimiter="\n", dtype=np.int64)

train_set = np.genfromtxt("data/{}_train_set.csv".format(model_name), delimiter="\n", dtype=np.int64)
val_set = np.genfromtxt("data/{}_val_set.csv".format(model_name), delimiter="\n", dtype=np.int64)
test_set = np.genfromtxt("data/{}_test_set.csv".format(model_name), delimiter="\n", dtype=np.int64)

In [5]:
with open("static/pipeline.pkl", "rb") as f:
    pipeline = dill.load(f)

## Load Vocabulary and Instantiate as dictionary

In [8]:
vocabulary = np.genfromtxt("static/vocabulary.csv", delimiter="\n", dtype=np.int64)
vocabulary

array([       -1,         0,     -4096, ..., 166841048,   -393216,
       -40550688], dtype=int64)

In [9]:
vocabulary = {v:i for i, v in enumerate(vocabulary)}

In [12]:
dict(list(vocabulary.items())[:10])

{-1: 0,
 0: 1,
 -4096: 2,
 -909517620: 3,
 909517620: 4,
 -8192: 5,
 8: 6,
 4096: 7,
 -8: 8,
 -12288: 9}

## Demo

In [18]:
train_original_sample = train_set_original[:30]

In [15]:
processed_train_sample = pipeline.transform(train_original_sample)
processed_train_sample

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0, 888,   0,   0,   0,   0,
         0,   0,  48])

In [16]:
# Delta
train_sample = train_original_sample[:-1] - train_original_sample[1:]
train_sample

array([-90096539952,      -528712,       -73032,      1315952,
            -160160,      -549096,       817016,      -248576,
           -1115480,      1044376,     -1382144,      -146992,
            1547488,      -558368,    -54420112,     54975104,
           -1362752,       433536,       516512,      -131600,
             433668,         6120,      -389321,      -901523,
              12160,    -19974760,     21278728,      -640432,
                -48], dtype=int64)

In [19]:
# Note that Irreversible predictions are returned as -1.
train_sample_inverse_transformed = pipeline.inverse_transform(processed_train_sample)
train_sample_inverse_transformed

array([  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
         -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, 6120,
         -1,   -1,   -1,   -1,   -1,   -1,  -48], dtype=int64)

In [24]:
# Processed / Original Delta / Inverse_transformed (= restored from prediction)
processed_train_sample[21], train_sample[21], train_sample_inverse_transformed[21]

(888, 6120, 6120)

In [28]:
vocabulary[6120]

888