In [121]:
import pandas as pd
import numpy as np
import pickle as p

In [122]:
data_all = pd.read_csv('data_all.csv')

# Test with Subset of Data
np.random.seed(43)
item_subset = list(np.random.choice(data_all['Item_ID'].unique(), size=50, replace=False))
data_subset = data_all.loc[data_all['Item_ID'].isin(item_subset)].copy()

# # Test with All Data
# data_subset = data_all.copy()

In [123]:
item_subset

[202,
 16019,
 3732,
 1230,
 3766,
 2589,
 2531,
 1195,
 3726,
 3743,
 5469,
 7114,
 7008,
 7575,
 2867,
 2701,
 7157,
 7434,
 1221,
 350,
 5501,
 2731,
 16018,
 2316,
 2256,
 4272,
 7128,
 204,
 2521,
 5581,
 16017,
 7965,
 3821,
 201,
 4978,
 3731,
 3773,
 3816,
 3768,
 2581,
 3772,
 3733,
 2257,
 3727,
 4725,
 7109,
 4270,
 102,
 3728,
 3045]

### `/train`

In [None]:
import requests
import json

url = 'http://localhost:5000/train/'

data = data_subset.to_json()

payload = {'data': data,
           'cv_acc': True,
           'project_id': 6
           }

headers = {'content-type': 'application/json',
           'Accept-Charset': 'UTF-8'
          }

payload = json.dumps(payload)
r = requests.post(url, data=payload, headers=headers)

In [None]:
r.json()['result']

In [None]:
# CV Accuracies Returned
cv_accuracies = pd.DataFrame.from_dict((r.json()['cv_acc']))
cv_accuracies

### `/optimize`

In [None]:
url = 'http://localhost:5000/optimize/'

payload = {'project_id': 1,
           'constraints': [],
           'population': 100,
           'max_epoch': 200
           }

headers = {'content-type': 'application/json',
           'Accept-Charset': 'UTF-8'
          }

payload = json.dumps(payload)
r = requests.post(url, data=payload, headers=headers)

In [None]:
best_prices = r.json()['result']
best_prices

In [None]:
import requests
import json

url = 'http://localhost:5000/predict/'
payload = {'prices': best_prices,
           'project_id': 1
           }
headers = {'content-type': 'application/json',
           'Accept-Charset': 'UTF-8'
          }
payload = json.dumps(payload)
r = requests.post(url, data=payload, headers=headers)

# What If Prediction
qty = r.json()['qty_estimates']

In [None]:
qty

In [None]:
# Calculate Revenue
revenue = 0
for item in best_prices.keys():
    revenue += (best_prices[item]*qty['Qty_'+item])
    
print(revenue)

### `/predict/`

In [25]:
# Sample Actual Price 
from helper_functions import *

sales_data = data_subset

sales_data = optimize_memory(sales_data)

sales_data_wide = sales_data.set_index(
    ['Wk', 'Tier', 'Store', 'Item_ID']).unstack(level=-1).reset_index().copy()
sales_data_wide.columns = [
    ''.join(str(i) for i in col).strip()
    for col in sales_data_wide.columns.values
]
sales_data_wide = sales_data_wide.sort_values(
    ['Tier', 'Store', 'Wk'], ascending=True).reset_index(drop=True)

sales_data_wide_clean = sales_data_wide.dropna(axis=0).copy()
dataset = sales_data_wide_clean
price_columns = [
    col for col in sales_data_wide_clean.columns if col.startswith('Price')
]

a = dataset.sample(1)
a_dict = a[price_columns].iloc[0].to_dict()
a_input = {}
for key in a_dict.keys():
    a_input[key.split('_')[1]]=round(a_dict[key],2)

print('Input Prices Quantities:')
a_input

Input Prices Quantities:


{'31': 1.85, '102': 9.65, '1203': 3.2, '2259': 13.0, '5073': 3.7}

In [27]:
# Predict Sales Qty and Compare to Actual

import requests
import json

url = 'http://localhost:5000/predict/'

payload = {'prices': a_input, 'project_id': 1}

headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}

payload = json.dumps(payload)
r = requests.post(url, data=payload, headers=headers)
qty = r.json()['qty_estimates']
qty_outp = {}
for key in qty.keys():
    qty_outp[key] = round(qty[key], 0)
actual = a[[
    col for col in sales_data_wide_clean.columns if col.startswith('Qty_')
]].iloc[0].to_dict()
pd.DataFrame([qty_outp, actual], index=['Estimated', 'Actual']).transpose()

Unnamed: 0,Estimated,Actual
Qty_102,160.0,70.0
Qty_1203,191.0,138.0
Qty_2259,80.0,38.0
Qty_31,40.0,44.0
Qty_5073,91.0,26.0


In [None]:
	def train_all_items(self, proj_id, retrain=True):
		item_ids = [int(x.split('_')[1]) for x in self.price_columns]
		processes = []
		results_ls = []
		with ThreadPoolExecutor(max_workers=10) as executor:

			for item_id in item_ids:
				processes.append(executor.submit(self.get_model,item_id,proj_id))
				# self.get_model(item_id,proj_id)
		for task in as_completed(processes):
			# print(task.result())
			results_ls.append(task.result())


		log.info(f'TRAINING COMPLETED FOR {len(item_ids)} ITEMS.')