In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

Using TensorFlow backend.


In [2]:
DATA_PATH = "Data/example-file.csv"

In [3]:
model = load_model("dream_recom")

transactions_raw = pd.read_csv(DATA_PATH)
transactions_raw = transactions_raw.sort_values("LAST_TXN_DTE")

In [4]:
transactions_fgc_encoded = transactions_raw.copy()
dummies = pd.get_dummies(transactions_raw["FGC"])

# Keep hold of new categories real values
dummyMapper = list(dummies.columns)
transactions_fgc_encoded = pd.concat([transactions_fgc_encoded, dummies], axis=1)

del dummies
del transactions_raw

transactions_fgc_encoded = transactions_fgc_encoded.drop(["FGC", "LAST_TXN_DTE", "recency"], axis=1)

In [5]:
scaler = MinMaxScaler()

transactions_fgc_encoded.loc[:, "target":] = scaler.fit_transform(transactions_fgc_encoded.loc[:, "target":])

In [6]:
output = []

lim = 10

for name, user_transactions in transactions_fgc_encoded.groupby(by="COLLECTOR_KEY"):
    transaction_sequence = [ user_transactions.values[-lim+1:, 1:] ]
    transaction_sequence = np.rollaxis(np.dstack(transaction_sequence), -1)
    
    output.append( [name] + model.predict(transaction_sequence)[0].tolist() )

In [7]:
dummyMapper = np.load("dummyMapper.npy").tolist()

output_df = pd.DataFrame( output, columns=["COLLECTOR_KEY"] + dummyMapper )
output_df.head(5)

Unnamed: 0,COLLECTOR_KEY,2,12,37,39,44,74,179,214,509,...,111778,111973,112430,112567,112568,112570,112631,112639,115203,115237
0,1290,5.675372e-09,0.0002366876,1.286034e-07,7.098121e-09,1.238556e-09,9.923915e-09,6.061065e-05,4.400824e-08,2.718064e-05,...,6.375544e-07,0.0002819852,9.275533e-12,0.0001142435,2.070982e-06,5.580102e-08,9.352286e-10,3.81483e-10,2.886295e-09,4.643197e-09
1,1886,3.564911e-14,4.391907e-09,1.8568320000000003e-17,3.356059e-07,5.424822e-13,2.889252e-09,7.006687e-10,1.792286e-11,1.006072e-11,...,2.152522e-05,1.232008e-09,1.028298e-09,2.009633e-10,1.080043e-08,9.252722e-11,7.670881e-14,0.01155185,4.756276e-07,4.074742e-15
2,2125,4.227199e-06,2.583905e-13,4.347049e-07,2.621433e-14,7.08085e-11,0.01749697,0.0002768464,3.766631e-09,1.266421e-06,...,6.634842e-10,1.939974e-11,5.643065e-05,0.003056712,3.610759e-07,4.0983149999999996e-19,1.582024e-13,0.0001241584,9.944844e-12,1.836874e-14
3,3213,5.542011e-13,5.066587e-11,1.045554e-24,6.09046e-09,2.513521e-14,5.498629e-14,1.911517e-12,3.118706e-12,4.5877950000000005e-22,...,1.547478e-07,6.798622e-10,1.315681e-12,1.255688e-15,7.967477e-13,3.1369e-06,7.465312e-11,0.002786255,1.040337e-05,2.22429e-09
4,5682,0.01551838,3.03536e-09,0.0001048888,9.04819e-11,0.0007701767,1.018202e-08,0.0002166602,1.159021e-12,5.586877e-07,...,1.995123e-09,1.052119e-06,2.368152e-12,3.608611e-09,0.000754874,2.330541e-11,2.328615e-09,4.401723e-07,2.489686e-07,1.288817e-08


In [8]:
output_df.to_csv("result.csv", index=False)