# Example P2V-MAP

This notebook runs P2V-MAP as propsed in

> Gabel, S., Guhl, D., & Klapper, D. (2019). P2V-MAP: Mapping Market Structures for Large Retail Assortments. *Journal of Marketing Research* (forthcoming).

Please specify author, output path, experiment name, seed for the random number generator, and add a comment for the experiment in cell below. <br>
Runs for approximately four minutes on 40x Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz, 256 GB 1600 MHz DDR3, GPU Titan X.

In [None]:
INPUT = {
    "author": "SG",
    "output_path": "./results",
    "experiment": "p2v-map-example",
    "comment": "Test run on sample data set.",
    "verbose": True,
    "seed": 501,
}

## Import libraries and and helper functions

In [None]:
import sys                        # add library path
import os                         # path utils
import pathlib                    # create directories
import shutil                     # remove file trees
import pandas as pd               # data frames
import numpy as np                # random number seed
import tensorflow as tf           # random number seed

In [None]:
sys.path.append('./libraries')
import p2vmap                     # p2v-map
import utils                      # utility methods
import evaluation                 # evaluation methods

## Experiment logger

In [None]:
experiment_logger = {
    x: INPUT[x] for x in ["author","output_path","experiment","comment","seed"]
}

## Data and controls

In [None]:
df_train = pd.read_csv('./data/baskets_train.csv.gz')
df_validation = pd.read_csv('./data/baskets_validation.csv.gz')
df_test = pd.read_csv('./data/baskets_test.csv.gz')
product = pd.read_csv('./data/master.csv.gz')

In [None]:
control_step_1_data = utils.read_json('./control/step_1_data.json')
if INPUT['verbose']:
    utils.print_json(control_step_1_data)

In [None]:
control_step_2_p2v = utils.read_json('./control/step_2_p2v.json')
control_step_2_p2v['p2v_kwargs']['path_results'] = '{output_path}/{experiment}'.format(**experiment_logger)
if INPUT['verbose']:
    utils.print_json(control_step_2_p2v)

In [None]:
control_step_3_map = utils.read_json('./control/step_3_map.json')
control_step_3_map['tsne_data_kwargs']['path_results'] = '{output_path}/{experiment}'.format(**experiment_logger)
if INPUT['verbose']:
    utils.print_json(control_step_3_map)

## Log experiment

In [None]:
experiment_logger["configs"] = {
    "step_1_data": control_step_1_data,
    "step_2_p2v": control_step_2_p2v,
    "step_3_map": control_step_3_map,
}

## Create output path for experiment and log configs

In [None]:
if os.path.exists(control_step_2_p2v['p2v_kwargs']['path_results']):
    shutil.rmtree(control_step_2_p2v['p2v_kwargs']['path_results'])

pathlib.Path(control_step_2_p2v['p2v_kwargs']['path_results']).mkdir(parents=True)

In [None]:
utils.write_json(
    x=experiment_logger,
    f='{output_path}/{experiment}/experiment_logger.json'.format(**experiment_logger)
)

## MAIN

In [None]:
np.random.seed(experiment_logger['seed'])
tf.set_random_seed(experiment_logger['seed'])

### Step I: Data Preparation

In [None]:
data_streamer_train, data_streamer_validation, data_streamer_test = p2vmap.step_1(
    df_train,
    df_validation,
    df_test,
    **control_step_1_data
)

### Step II: Latent Product Attributes

In [None]:
control_step_2_p2v['p2v_kwargs']['train_streamer'] = data_streamer_train
control_step_2_p2v['p2v_kwargs']['validation_streamer'] = data_streamer_validation
control_step_2_p2v['p2v_kwargs']['test_streamer'] = data_streamer_test

In [None]:
p2v_instance = p2vmap.step_2(
    **control_step_2_p2v
)

### Step III: Mapping of Products and Attribute Overlays

In [None]:
map_data = p2vmap.step_3(master=product, **control_step_3_map)

In [None]:
map_data.head()

## Analysis

### P2V Dashboard (Step II)

In [None]:
dashboard = evaluation.DashboardTensorFlowSG(
    '%s/out' % control_step_2_p2v['p2v_kwargs']['path_results'],
    n_heatmap=10,
    master=product[['c', 'j']]
)

In [None]:
dashboard.plot_loss()

In [None]:
dashboard.plot_product_embedding(label='file_wi')

In [None]:
dashboard.plot_product_embedding(label='file_wo')

### Benchmarking

In [None]:
scores = evaluation.benchmarking(map_data)