In [5]:
import pandas as pd 
import demand
transactions = pd.read_csv("data/transactions.csv")

## Looking for complimentary products
#### products that are usually sold together with the chosen item

In [6]:
product = "Biologische brocolli"

customers = transactions[transactions["description"] == product]["customer"]
customers_basket = {}

for customer in customers:
    basket = transactions[transactions["customer"] == customer]["description"].values
    customers_basket[customer] = basket

In [8]:
purchase_frequencies = {}
for cust_id in customers_basket.keys():
    basket = customers_basket[cust_id]
    for item in basket:
        if purchase_frequencies.get(item) == None:
            purchase_frequencies[item] = 1
        else:
            purchase_frequencies[item] += 1
df_purchases = pd.DataFrame({"product":purchase_frequencies.keys(), "frequency":purchase_frequencies.values()})
df_purchases.sort_values("frequency", inplace=True, ascending=False)
df_purchases.head(10)

Unnamed: 0,product,frequency
16,Biologische brocolli,1303
71,Biologisch volkoren heel,780
20,Basis annanasstukjes,777
32,Biologisch Vloerbrood meerzaden heel,690
74,Biologisch bruin heel,654
69,Biologische pompoen,461
66,Biologische courgette,458
111,Biologische cherry tomaten,449
68,Biologische knoflook,434
114,Biologische rode kool,429


## Creating data for one complimentary product

In [22]:
compliment_product = "Basis annanasstukjes"
compl_transactions = transactions[transactions["description"] == compliment_product]
compl_transactions["day"] = pd.to_datetime(compl_transactions["day"], dayfirst=True)
grouped = compl_transactions.groupby("day").mean()["purchase_price"]
compl_sales = pd.DataFrame({"sales":grouped.values, "day": grouped.index})
compl_sales["sales"].max()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  compl_transactions["day"] = pd.to_datetime(compl_transactions["day"], dayfirst=True)


1.0100000000000007

## Testing the function and predictions

In [2]:
product = "Biologische courgette"
compl_prod = "Basis annanasstukjes"
filtered_transactions = transactions[transactions["description"] == product]
test_prepare = demand.prepare_data(filtered_transactions, complimentary_products=compl_prod, full_transactions=transactions)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_transactions["day"] = pd.to_datetime(filtered_transactions["day"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_transactions["discount"] = 100-(
compl_product is a string
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  purchases["month"] = [purchases.index[i][1].month_name() for 

In [8]:
test_prepare[test_prepare["sales"] < 1]

(56, 16)

In [7]:
test_prepare.join(demand.calc_sales(transactions, compl_prod).set_index("day"))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filt_transactions["day"] = pd.to_datetime(filt_transactions["day"], dayfirst=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,count,discount,std,min,25%,50%,75%,max,month,product,day,dayofweek,purchase_price,std_sales_price,prev_day_purchases,sales
description,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Biologische courgette,2018-01-01,8.0,14.792899,0.0,14.792899,14.792899,14.792899,14.792899,14.792899,January,Biologische courgette,1,Monday,1.44,1.69,0.0,1.01
Biologische courgette,2018-01-02,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,2,Tuesday,1.69,1.69,8.0,1.01
Biologische courgette,2018-01-03,4.0,30.177515,0.0,30.177515,30.177515,30.177515,30.177515,30.177515,January,Biologische courgette,3,Wednesday,1.18,1.69,2.0,1.01
Biologische courgette,2018-01-04,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,4,Thursday,1.69,1.69,4.0,1.01
Biologische courgette,2018-01-05,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,5,Friday,1.69,1.69,3.0,1.01
Biologische courgette,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Biologische courgette,2018-12-26,4.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,360,Wednesday,1.69,1.69,2.0,1.01
Biologische courgette,2018-12-27,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,361,Thursday,1.69,1.69,4.0,1.01
Biologische courgette,2018-12-28,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,362,Friday,1.69,1.69,3.0,1.01
Biologische courgette,2018-12-29,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,363,Saturday,1.69,1.69,2.0,1.01


In [3]:
test_prepare

Unnamed: 0_level_0,Unnamed: 1_level_0,count,discount,std,min,25%,50%,75%,max,month,product,day,dayofweek,purchase_price,std_sales_price,prev_day_purchases
description,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Biologische courgette,2018-01-01,8.0,14.792899,0.0,14.792899,14.792899,14.792899,14.792899,14.792899,January,Biologische courgette,1,Monday,1.44,1.69,0.0
Biologische courgette,2018-01-02,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,2,Tuesday,1.69,1.69,8.0
Biologische courgette,2018-01-03,4.0,30.177515,0.0,30.177515,30.177515,30.177515,30.177515,30.177515,January,Biologische courgette,3,Wednesday,1.18,1.69,2.0
Biologische courgette,2018-01-04,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,4,Thursday,1.69,1.69,4.0
Biologische courgette,2018-01-05,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,5,Friday,1.69,1.69,3.0
Biologische courgette,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Biologische courgette,2018-12-26,4.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,360,Wednesday,1.69,1.69,2.0
Biologische courgette,2018-12-27,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,361,Thursday,1.69,1.69,4.0
Biologische courgette,2018-12-28,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,362,Friday,1.69,1.69,3.0
Biologische courgette,2018-12-29,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,363,Saturday,1.69,1.69,2.0


In [28]:
test_prepare.join(compl_sales.set_index("day"))

Unnamed: 0_level_0,Unnamed: 1_level_0,count,discount,std,min,25%,50%,75%,max,month,product,day,dayofweek,purchase_price,std_sales_price,prev_day_purchases,sales
description,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Biologische courgette,2018-01-01,8.0,14.792899,0.0,14.792899,14.792899,14.792899,14.792899,14.792899,January,Biologische courgette,1,Monday,1.44,1.69,0.0,1.01
Biologische courgette,2018-01-02,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,2,Tuesday,1.69,1.69,8.0,1.01
Biologische courgette,2018-01-03,4.0,30.177515,0.0,30.177515,30.177515,30.177515,30.177515,30.177515,January,Biologische courgette,3,Wednesday,1.18,1.69,2.0,1.01
Biologische courgette,2018-01-04,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,4,Thursday,1.69,1.69,4.0,1.01
Biologische courgette,2018-01-05,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,January,Biologische courgette,5,Friday,1.69,1.69,3.0,1.01
Biologische courgette,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Biologische courgette,2018-12-26,4.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,360,Wednesday,1.69,1.69,2.0,1.01
Biologische courgette,2018-12-27,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,361,Thursday,1.69,1.69,4.0,1.01
Biologische courgette,2018-12-28,2.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,362,Friday,1.69,1.69,3.0,1.01
Biologische courgette,2018-12-29,3.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,December,Biologische courgette,363,Saturday,1.69,1.69,2.0,1.01
