# Import modules and define parameters

In [None]:
import time
import os
import gzip
import pickle
import platform
import numpy as np
import pandas as pd

pklhp = pickle.HIGHEST_PROTOCOL

In [None]:
from wbce_fitter import RecurrentWBCE

In [None]:
# database stuff
# valid choices: day, week, month, quarter
timeunit = 'week' 

In [None]:
# meta settings
workers = 4
batch_size = 10*4096
epochs = 1

# names and stuff
purpose = 'gprun'
modelspecfname = 'specD2W1.py'

In [None]:
#skudex = None
#skudex = 0

# db name
skusfname = 'sodapopskus.txt'
discardfirstn = 250

## Load Transaction Data

In [None]:
import funforsql_blagdon as ffs

In [None]:
transactions = ffs.gettransactions_cached(skusfname, timeunit=timeunit)
transactions = transactions.loc[transactions['ordqty']>0,:]

In [None]:
def discardfirst(transactions, n):
    dates = transactions.loc[:, 'orddate_index'] - n
    transactions.loc[:, 'orddate_index'] = dates
    return transactions.loc[dates>=0,:]

transactions = discardfirst(transactions, discardfirstn)

com, split, fin = ffs.gettimes(transactions)
## note that com needs to be 0 since i didn't bother to account for the case where it isn't 0.
print ('com:%i, split:%i, fin:%i' % (com, split, fin))

transtrain = ffs.getdf_intimerange(transactions, com, split)
transtest  = ffs.getdf_intimerange(transactions, split, fin)

uniqueskus = np.sort(np.unique(transtrain.loc[:, 'product_sku']))

## Modify source db type, subset transaction table if necessary

In [None]:
def get_useskus(skudex):   
    useskus = uniqueskus
    if skudex is not None:
        useskus = np.array([uniqueskus[skudex]])
    return useskus

In [None]:
def get_particularsku(skudex):
    particularsku = ''
    useskus = get_useskus(skudex)
    if not (str(useskus) == str(uniqueskus)):
        particularsku = str(useskus[0])
    return particularsku

## Load Validation Data

In [None]:
def get_validf(skudex):    
    particularsku = get_particularsku(skudex)
    valdf_fname = skusfname+particularsku+'_'+modelspecfname+'_eval.pkl'+str(pklhp)
    validationdf = pickle.load(gzip.open(valdf_fname, 'rb'))
    return validationdf

In [None]:
validdf_joint = get_validf(None)
validdf_indiv = {}
skudex = 0
for sku in uniqueskus:
    validdf_indiv[sku] = get_validf(skudex)
    skudex += 1

In [None]:
print (validdf_joint.head())

In [None]:
validdf_synth = validdf_joint.copy()

for key in validdf_indiv:
    colname = 'metric'+str(key)
    print (colname)
    validdf_temp = validdf_indiv[key]
    index_temp = validdf_temp.index.values
    validdf_synth.loc[:, colname] = float('nan')
    validdf_synth.loc[index_temp, colname] = validdf_temp.loc[index_temp,colname]
    
print (validdf_synth.head())

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

def getrocmetrics(truth, metric):
    fpr, tpr, _ = roc_curve(truth, metric)
    area = roc_auc_score(truth, metric)
    return fpr, tpr, area

In [None]:
truth  = np.array([])
jointmetric = np.array([])
synthmetric = np.array([])

for sku in uniqueskus:
    truthcolname = 'truth'+str(sku)
    metriccolname = 'metric'+str(sku)
    truth = np.concatenate([truth, validdf_joint.loc[:, truthcolname]])
    
    jointmetric = np.concatenate([jointmetric, validdf_joint.loc[:, metriccolname]])
    metrictemp = np.copy(validdf_synth.loc[:, metriccolname])
    metrictemp[np.isnan(metrictemp)] = 0    
    synthmetric = np.concatenate([synthmetric, metrictemp])
    
fpr, tpr, area = getrocmetrics(truth, jointmetric)
print ('joint predictions')
print (area)
plt.plot(fpr, tpr, color = 'blue')
plt.plot(fpr, fpr, color = 'red', ls = '--')
plt.show()

fpr, tpr, area = getrocmetrics(truth, synthmetric)
print ('synthetic predictions')
print (area)
plt.plot(fpr, tpr, color = 'blue')
plt.plot(fpr, fpr, color = 'red', ls = '--')
plt.show()

In [None]:
for sku in uniqueskus:
    truthcolname = 'truth'+str(sku)
    metriccolname = 'metric'+str(sku)
    buyintrain = np.logical_not(np.isnan(validdf_synth[metriccolname]))
    print ('number bought sku:%s in train:%i' % (str(sku), np.sum(buyintrain)))

    
    # for buyers of this sku
    print ('\npredictions for sku:%s, for buyers of this sku' % str(sku))
    plt.close()
    fig, axarr = plt.subplots(nrows=1, ncols=2, figsize = (10, 4))
    
    truth = validdf_joint.loc[buyintrain, truthcolname]
    metric = validdf_joint.loc[buyintrain, metriccolname]
    fpr, tpr, area = getrocmetrics(truth, metric)
    
    axarr[0].plot(fpr, tpr, color = 'blue')
    axarr[0].plot(fpr, fpr, color = 'red', ls = '--')
    axarr[0].set_title('joint predictions for buyers of this sku')
    print (area)
    
    truth = validdf_synth.loc[buyintrain, truthcolname]
    metric = validdf_synth.loc[buyintrain, metriccolname]
    fpr, tpr, area = getrocmetrics(truth, metric)
    
    axarr[1].plot(fpr, tpr, color = 'blue')
    axarr[1].plot(fpr, fpr, color = 'red', ls = '--')
    axarr[1].set_title('indiv predictions for buyers of this sku')
    print (area)
        
    plt.show()
    

    # for all buyers
    print ('\npredictions for sku:%s, for all buyers' % str(sku))
    plt.close()
    fig, axarr = plt.subplots(nrows=1, ncols=2, figsize = (10, 4))
    
    truth = validdf_joint.loc[:, truthcolname]
    metric = validdf_joint.loc[:, metriccolname]
    fpr, tpr, area = getrocmetrics(truth, metric)
    
    axarr[0].plot(fpr, tpr, color = 'blue')
    axarr[0].plot(fpr, fpr, color = 'red', ls = '--')
    axarr[0].set_title('joint predictions for all buyers')
    print (area)
    
    truth = validdf_synth.loc[:, truthcolname]
    metric = np.copy(validdf_synth.loc[:, metriccolname])
    metric[np.isnan(metric)] = 0
    fpr, tpr, area = getrocmetrics(truth, metric)
    
    axarr[1].plot(fpr, tpr, color = 'blue')
    axarr[1].plot(fpr, fpr, color = 'red', ls = '--')
    axarr[1].set_title('indiv predictions for all buyers, padded')
    print (area)
        
    plt.show()