In [1]:
### Standard Imports - Sorry PEP8 fans, do not look below
import pandas as pd, numpy as np, os, re, json
from pathlib import Path
from datetime import datetime

from joshberry.utils import *
from joshberry.feats import *

## Specific Imports
import itertools
from typing import Iterator, Mapping, List

#### I save my API token as an environmental variable.
if os.environ.get("DR_API_TOKEN") == None:
    API_TOKEN = "OR__pasteyourtokenherefromthedatarobotbyclickinginthetopright"
else:
    API_TOKEN = os.environ.get("DR_API_TOKEN")
ENDPOINT_URL = "https://app.datarobot.com/api/v2"

# Optimizer app credentials
app_url = ''
token = ''

# Deployment id
deployment_id = ''

### Display options for notebooks
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 25)

### set path directories
curr_dir = Path(os.getcwd())
print('Current Directory is: ', str(curr_dir))
data_dir = Path(curr_dir.parents[0] / 'data/')
artifacts_dir = Path(curr_dir.parents[0] / 'artifacts/')

Current Directory is:  /Users/josh.berry/_company/data-science-scripts/joshberry/generic-use-cases/assortment_optimization/notebooks


In [2]:
### Common project specific variables
FILENAME = 'fake_assortment_demo_data.csv'  # original data

### Data Import

In [3]:
indata = pd.read_csv(Path(data_dir) / FILENAME)

In [5]:
# create upc data for lookups later
upcdata = indata[['new_upc','PRODUCT_DESCRIPTION','PRODUCT_MANUFACTURER','PRODUCT_CATEGORY','PRODUCT_SUB_CATEGORY','PRODUCT_SIZE']].drop_duplicates()

if len(upcdata) > len(upcdata['new_upc'].unique()):
    print("ERROR - UPC is not unique!")
else:
    print("ok")
    
upcdata.head(3)

ok


Unnamed: 0,new_upc,PRODUCT_DESCRIPTION,PRODUCT_MANUFACTURER,PRODUCT_CATEGORY,PRODUCT_SUB_CATEGORY,PRODUCT_SIZE
0,7797508006,SNYDR FF MINI PRETZELS,SNYDER S,BAG SNACKS,PRETZELS,16 OZ
32,3000006610,QKER CAP N CRUNCH,QUAKER,COLD CEREAL,KIDS CEREAL,14 OZ
77,7192100337,DIGRN SUPREME PIZZA,TOMBSTONE,FROZEN PIZZA,PIZZA/PREMIUM,32.7 OZ


In [6]:
# create store data for lookups later
storedata = indata[['STORE_NAME','ADDRESS_STATE_PROV_CODE','SEG_VALUE_NAME','PARKING_SPACE_QTY','SALES_AREA_SIZE_NUM','STORE_MSA']].drop_duplicates()

if len(storedata) > len(storedata['STORE_NAME'].unique()):
    print("ERROR - Store Name is not unique!")
else:
    print("ok")
    
storedata.head(3)

ok


Unnamed: 0,STORE_NAME,ADDRESS_STATE_PROV_CODE,SEG_VALUE_NAME,PARKING_SPACE_QTY,SALES_AREA_SIZE_NUM,STORE_MSA
0,15TH & MADISON,KY,VALUE,196.0,24721,MSA ID:17140
32,TOWN & COUNTRY,OH,UPSCALE,270.0,44547,MSA ID:19380
53,SILVERLAKE,KY,MAINSTREAM,408.0,46073,MSA ID:17140


In [7]:
# First, a list of all possible UPC indicators
all_choices = indata.new_upc.unique()
all_choices_names = ['UPC_AVAIL _ ' + str(c) for c in all_choices]

## RE-RUN BEGINNING HERE

In [26]:
#### FOR TROUBLE SHOOTING CHANGE THIS BETWEEN 2 and 4 WHICH WILL YIELD DIFFERENT DATASET SIZES
N_ITEMS = 2

# calculate possible combinations of specific size
scenarios = list(itertools.combinations(set(all_choices_names), N_ITEMS))


def create_simulations(all_choices_names: List[str], scenarios: List[List[str]]) -> pd.DataFrame:
    return pd.DataFrame.from_records(iter_records(all_choices_names, scenarios))

def iter_records(all_choices_names: List[str], scenarios: List[List[str]]) -> Iterator[Mapping[str, int]]:
    for row in scenarios:
        counter_map = {c: 0 for c in all_choices_names}
        for element in row:
            counter_map[element] = 1
        for element in row:
            row_record = {"new_upc_txt": element}
            row_record.update(counter_map)
            yield row_record
            
dfSims = create_simulations(all_choices_names, scenarios)

In [27]:
dfSims.head(3)

Unnamed: 0,new_upc_txt,UPC_AVAIL _ 7797508006,UPC_AVAIL _ 3000006610,UPC_AVAIL _ 7192100337,UPC_AVAIL _ 7797502248,UPC_AVAIL _ 1111009477,UPC_AVAIL _ 1111085350,UPC_AVAIL _ 2840002333,UPC_AVAIL _ 2840004768,UPC_AVAIL _ 1111009497,UPC_AVAIL _ 1111085319,UPC_AVAIL _ 7110410470,UPC_AVAIL _ 1111009507,UPC_AVAIL _ 3700019521,UPC_AVAIL _ 7797508004,UPC_AVAIL _ 31254742725,UPC_AVAIL _ 1111035398,UPC_AVAIL _ 88491212971,UPC_AVAIL _ 3800031829,UPC_AVAIL _ 3800039118,UPC_AVAIL _ 3800031838,UPC_AVAIL _ 31254742835,UPC_AVAIL _ 7218063052,UPC_AVAIL _ 7218063979,UPC_AVAIL _ 1111038078,UPC_AVAIL _ 7218063983,UPC_AVAIL _ 2066200530,UPC_AVAIL _ 1111038080,UPC_AVAIL _ 1600027564,UPC_AVAIL _ 2840004770,UPC_AVAIL _ 3000006340,UPC_AVAIL _ 1111085345,UPC_AVAIL _ 1111087395,UPC_AVAIL _ 7110410471,UPC_AVAIL _ 1111087396,UPC_AVAIL _ 1111087398,UPC_AVAIL _ 1600027527,UPC_AVAIL _ 2066200532,UPC_AVAIL _ 1600027528,UPC_AVAIL _ 2066200531,UPC_AVAIL _ 3000006560,UPC_AVAIL _ 3500068914,UPC_AVAIL _ 3700031613,UPC_AVAIL _ 3700044982,UPC_AVAIL _ 4116709428,UPC_AVAIL _ 4116709448,UPC_AVAIL _ 4116709565,UPC_AVAIL _ 7027312504,UPC_AVAIL _ 7027316204,UPC_AVAIL _ 7027316404,UPC_AVAIL _ 7110410455,UPC_AVAIL _ 7192100336,UPC_AVAIL _ 7192100339,UPC_AVAIL _ 31254742735,UPC_AVAIL _ 88491201426,UPC_AVAIL _ 88491201427
0,UPC_AVAIL _ 3800031838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,UPC_AVAIL _ 1600027528,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,UPC_AVAIL _ 3800031838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [28]:
# extract the real upc value so that we can join it to upc data
dfSims['new_upc'] = dfSims['new_upc_txt'].str.split(' _ ').str[-1].astype('int')
dfSims.drop(columns=['new_upc_txt'], inplace=True)

In [29]:
# select the date and store that we want to optimize for
CHOSEN_STORE = 'SILVERLAKE'
CHOSEN_DATE = '2021-02-27 00:00'

### static choices
dfSims['WEEK_END_DATE'] = CHOSEN_DATE
dfSims['STORE_NAME'] = CHOSEN_STORE

### lookup attributes from store and upc
dfSims = dfSims.set_index('new_upc').join(upcdata.set_index('new_upc')).reset_index(drop=False)
dfSims = dfSims.set_index('STORE_NAME').join(storedata.set_index('STORE_NAME')).reset_index(drop=False)
dfSims

Unnamed: 0,STORE_NAME,new_upc,UPC_AVAIL _ 7797508006,UPC_AVAIL _ 3000006610,UPC_AVAIL _ 7192100337,UPC_AVAIL _ 7797502248,UPC_AVAIL _ 1111009477,UPC_AVAIL _ 1111085350,UPC_AVAIL _ 2840002333,UPC_AVAIL _ 2840004768,UPC_AVAIL _ 1111009497,UPC_AVAIL _ 1111085319,UPC_AVAIL _ 7110410470,UPC_AVAIL _ 1111009507,UPC_AVAIL _ 3700019521,UPC_AVAIL _ 7797508004,UPC_AVAIL _ 31254742725,UPC_AVAIL _ 1111035398,UPC_AVAIL _ 88491212971,UPC_AVAIL _ 3800031829,UPC_AVAIL _ 3800039118,UPC_AVAIL _ 3800031838,UPC_AVAIL _ 31254742835,UPC_AVAIL _ 7218063052,UPC_AVAIL _ 7218063979,UPC_AVAIL _ 1111038078,UPC_AVAIL _ 7218063983,UPC_AVAIL _ 2066200530,UPC_AVAIL _ 1111038080,UPC_AVAIL _ 1600027564,UPC_AVAIL _ 2840004770,UPC_AVAIL _ 3000006340,UPC_AVAIL _ 1111085345,UPC_AVAIL _ 1111087395,UPC_AVAIL _ 7110410471,UPC_AVAIL _ 1111087396,UPC_AVAIL _ 1111087398,UPC_AVAIL _ 1600027527,UPC_AVAIL _ 2066200532,UPC_AVAIL _ 1600027528,UPC_AVAIL _ 2066200531,UPC_AVAIL _ 3000006560,UPC_AVAIL _ 3500068914,UPC_AVAIL _ 3700031613,UPC_AVAIL _ 3700044982,UPC_AVAIL _ 4116709428,UPC_AVAIL _ 4116709448,UPC_AVAIL _ 4116709565,UPC_AVAIL _ 7027312504,UPC_AVAIL _ 7027316204,UPC_AVAIL _ 7027316404,UPC_AVAIL _ 7110410455,UPC_AVAIL _ 7192100336,UPC_AVAIL _ 7192100339,UPC_AVAIL _ 31254742735,UPC_AVAIL _ 88491201426,UPC_AVAIL _ 88491201427,WEEK_END_DATE,PRODUCT_DESCRIPTION,PRODUCT_MANUFACTURER,PRODUCT_CATEGORY,PRODUCT_SUB_CATEGORY,PRODUCT_SIZE,ADDRESS_STATE_PROV_CODE,SEG_VALUE_NAME,PARKING_SPACE_QTY,SALES_AREA_SIZE_NUM,STORE_MSA
0,SILVERLAKE,1111009477,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,PL MINI TWIST PRETZELS,PRIVATE LABEL,BAG SNACKS,PRETZELS,15 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
1,SILVERLAKE,1111009477,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,PL MINI TWIST PRETZELS,PRIVATE LABEL,BAG SNACKS,PRETZELS,15 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
2,SILVERLAKE,1111009477,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,PL MINI TWIST PRETZELS,PRIVATE LABEL,BAG SNACKS,PRETZELS,15 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
3,SILVERLAKE,1111009477,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,PL MINI TWIST PRETZELS,PRIVATE LABEL,BAG SNACKS,PRETZELS,15 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
4,SILVERLAKE,1111009477,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,PL MINI TWIST PRETZELS,PRIVATE LABEL,BAG SNACKS,PRETZELS,15 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2965,SILVERLAKE,88491212971,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2021-02-27 00:00,POST FRUITY PEBBLES,POST FOODS,COLD CEREAL,KIDS CEREAL,11 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
2966,SILVERLAKE,88491212971,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,POST FRUITY PEBBLES,POST FOODS,COLD CEREAL,KIDS CEREAL,11 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
2967,SILVERLAKE,88491212971,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-27 00:00,POST FRUITY PEBBLES,POST FOODS,COLD CEREAL,KIDS CEREAL,11 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140
2968,SILVERLAKE,88491212971,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2021-02-27 00:00,POST FRUITY PEBBLES,POST FOODS,COLD CEREAL,KIDS CEREAL,11 OZ,KY,MAINSTREAM,408.0,46073,MSA ID:17140


### PLACE IN VIDEO WHERE WE DID THE TROUBLESHOOTING

In [30]:
import datarobot as dr

DEPLOYMENT_ID = '602d6f8cbcd2f76c569398df'
RESULT_PATH = str(Path(data_dir) / 'testing_score_results.csv')
print('Results will be ' + RESULT_PATH)

dr.Client(
        endpoint=ENDPOINT_URL,
        token=API_TOKEN,
        ssl_verify=True
    )

job = dr.BatchPredictionJob.score(
    deployment=DEPLOYMENT_ID,
    intake_settings={
        'type': 'localFile',
        'file': dfSims,
    },
    output_settings={
        'type': 'localFile',
        'path': RESULT_PATH,
    },
    passthrough_columns_set = 'all'

)

job.wait_for_completion()




Results will be /Users/josh.berry/_company/data-science-scripts/joshberry/generic-use-cases/assortment_optimization/data/testing_score_results.csv


RuntimeError: Could not detect download URL for job 602e8977aad7635090b90a1e

In [19]:
check = pd.read_csv(Path(data_dir) / 'testing_score_results.csv')
print('result length: ' + str(len(check)))

result length: 2970
