In [1]:
import pandas as pd
import numpy as np 
import datetime
inventory = pd.read_csv("data/inventory.csv")
products = pd.read_csv("data/products.csv")
promotions = pd.read_csv("data/promotions.csv")
transactions = pd.read_csv("data/transactions.csv")

#create a reference dataframe
dayofyear = list(range(365))
date = [datetime.datetime(2018, 1, 1) + datetime.timedelta(day) for day in dayofyear]
df_date = pd.DataFrame({"day":dayofyear, "date":date})

import ast
for colname in inventory.columns[2:]:
    inventory[colname] = [ast.literal_eval(i) for i in inventory[colname]]
    #converting inventory rows to list of tuples from string

In [115]:
product = "Biologische prei" #select the product to analyze
transactions[transactions["description"] == product] #transactions for that product

Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
201,2018-01-01,12:20:00,14.0,67267391.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
867,2018-01-01,18:41:40,48.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
1935,2018-02-01,20:30:05,108.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
3753,2018-05-01,12:52:06,227.0,22598330.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
4446,2018-06-01,11:57:40,269.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
352880,2018-12-28,18:27:00,20585.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
353165,2018-12-29,11:20:57,20599.0,13185251.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
353393,2018-12-29,13:54:53,20612.0,62257065.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
353903,2018-12-30,9:09:23,20639.0,6301858.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0


In [116]:
def date_converter(row):
    converted_date = datetime.datetime(2018, 1, 1) + datetime.timedelta(row["day"])
    return converted_date

inventory["date"] = inventory.apply(date_converter, axis = 1) #convert day of year to actual timestamp

In [117]:
def week_of_year(row):
    week_nr = datetime.date(row["date"].year, row["date"].month, row["date"].day).isocalendar()[1]
    return week_nr

inventory["week"] = inventory.apply(week_of_year, axis = 1)
#add nr week of year as an extra column to inventory

In [118]:
#only keep relevant columns in inventory
filter_inventory = inventory[["day", "before or after delivery", "date", "week", product]]
#filters promotion table to only contain discounts for chosen product
promotions_filter = promotions[promotions["description"] == product]

def check_discount(row):
    if row["week"] in promotions_filter["week"].values:
        discount_amount = promotions_filter[promotions_filter["week"] == row["week"]]["discount"]
        return discount_amount.values[0]
    else:
        return 0
#adds extra column to filter inventory containing discount amount
filter_inventory["discount"] = filter_inventory.apply(check_discount, axis = 1)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [119]:
def total_inv(row):
    total = 0
    if len(row[product]) > 0:
        for batch in row[product]:
            total += batch[1]
    return total
#adds total nr items in inventory for the filtered dataframe
filter_inventory["total inventory"] = filter_inventory.apply(total_inv, axis = 1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [120]:
best_before = []
for element_list in filter_inventory[product]:
    for batch in element_list:
        best_before.append(batch[0])
bb_dates = np.unique(best_before)
#list of each unique best before dates
bb_dates

array([ 14,  17,  21,  24,  28,  31,  35,  38,  42,  45,  49,  52,  56,
        59,  63,  66,  70,  73,  77,  80,  84,  87,  91,  94,  98, 101,
       105, 108, 112, 115, 119, 122, 126, 129, 133, 136, 140, 143, 147,
       150, 154, 157, 161, 164, 168, 171, 175, 178, 182, 185, 189, 192,
       196, 199, 203, 206, 210, 213, 217, 220, 224, 227, 231, 234, 238,
       241, 245, 248, 252, 255, 259, 262, 266, 269, 273, 276, 280, 283,
       287, 290, 294, 297, 301, 304, 308, 311, 315, 318, 322, 325, 329,
       332, 336, 339, 343, 346, 350, 353, 357, 360, 364, 367, 371, 374])

In [121]:
df_waste = pd.DataFrame({"best before":bb_dates})
df_waste #build initial df with points where waste can occur

Unnamed: 0,best before
0,14
1,17
2,21
3,24
4,28
...,...
99,360
100,364
101,367
102,371


In [122]:
def input_inv(row):
    for element_list in filter_inventory[product]:
        if len(element_list) > 0:
            for batch in element_list:
                if batch[0] == row["best before"]:
                    return batch[1]

df_waste["amount"] = df_waste.apply(input_inv, axis = 1)
df_waste #amount that was received with particular expiry date

Unnamed: 0,best before,amount
0,14,26
1,17,26
2,21,26
3,24,26
4,28,26
...,...,...
99,360,26
100,364,26
101,367,26
102,371,26


In [123]:
df_waste = df_waste[df_waste["best before"] < 365] #only keep rows where expiry date is in same year
df_waste

Unnamed: 0,best before,amount
0,14,26
1,17,26
2,21,26
3,24,26
4,28,26
...,...,...
96,350,26
97,353,26
98,357,26
99,360,26


In [124]:
transactions["day"] = pd.to_datetime(transactions["day"])
filter_transactions = transactions[transactions["description"] == product]
#converts date then filters transactions to contain selected product purchases

first_restock = df_date[df_date["day"] == 3]["date"].values[0]
#first_restock check purchases mde before the first restock
filter_transactions[filter_transactions["day"] < first_restock]
#PROBLEM: IF A PRODUCT HASN'T BEEN BOUGHT IN ONE DAY IT HAS A ZERO, CAUSES AN ERROR FEW CELLS 

Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
201,2018-01-01,12:20:00,14.0,67267391.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
867,2018-01-01,18:41:40,48.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
28992,2018-01-02,11:53:17,1708.0,,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
56097,2018-01-03,11:30:50,3333.0,73458689.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
56293,2018-01-03,14:02:46,3348.0,33780213.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0
56629,2018-01-03,18:38:07,3371.0,33780213.0,vegetable,v_17,Biologische prei,1st,0.85,0.85,1.0,0.0


In [125]:
#cumulative sum of purchases from that product
purchases_per_day = filter_transactions.groupby("day").count()["product_id"].cumsum()
#purchases made per day from that product
purchases_per_day_plotting = filter_transactions.groupby("day").count()["product_id"]
#idk know what is this
purchases_per_day_plotting.index = purchases_per_day_plotting.index.dayofyear/7
purchases_per_day_plotting

day
0.142857     2
0.285714     1
0.428571     3
0.571429     9
0.714286     4
            ..
51.428571    4
51.571429    4
51.714286    2
51.857143    2
52.000000    2
Name: product_id, Length: 336, dtype: int64

In [126]:
purchases_per_day.index[0].year
weeknr = []
for ind in purchases_per_day.index:
    week = datetime.date(ind.year, ind.month, ind.day).isocalendar()[1]
    weeknr.append(week)
#week number for each purchase day


In [127]:
df_waste.drop(index=df_waste.tail(9).index, inplace=True)
#drop the last rows not to cause index issues
#STILL NEED TO BE FIXED
df_waste



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,best before,amount
0,14,26
1,17,26
2,21,26
3,24,26
4,28,26
...,...,...
87,318,26
88,322,26
89,325,26
90,329,26


In [128]:
df_waste["purchases"] = [purchases_per_day[i-1] for i in df_waste["best before"]]
#add cumulative sum of purchases up until that day (based on expiry dates)
df_waste



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,best before,amount,purchases
0,14,26,57
1,17,26,62
2,21,26,81
3,24,26,98
4,28,26,115
...,...,...,...
87,318,26,1214
88,322,26,1227
89,325,26,1233
90,329,26,1249


In [129]:
df_waste["prev purchases"] = df_waste["purchases"].diff()
df_waste #difference in purchases since a previous batch of product expired



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,best before,amount,purchases,prev purchases
0,14,26,57,
1,17,26,62,5.0
2,21,26,81,19.0
3,24,26,98,17.0
4,28,26,115,17.0
...,...,...,...,...
87,318,26,1214,16.0
88,322,26,1227,13.0
89,325,26,1233,6.0
90,329,26,1249,16.0


In [130]:
length = df_waste.shape[0]
lst_waste = [] #nr rows in waste dataframe
#amount left from the item
df_waste["remaining"] = df_waste["amount"] - df_waste["prev purchases"] 
#the first row is different as there is no prev pruchases available
df_waste["remaining"].iloc[0] = df_waste["amount"].iloc[0] - df_waste["purchases"].iloc[0]

for i in range(length):
    #iterates through the rows of the waste dataframe
    current_row = df_waste.iloc[i]
    if i == 0:
        #
        lst_waste.append(current_row["remaining"])
        continue     
    else:
        prev_row = df_waste.iloc[i-1] #previous row in dataframe
        if lst_waste[-1] < 0:
            #no idea whats happening here
            waste = lst_waste[-1] + current_row["remaining"]
            lst_waste.append(lst_waste[-1] + current_row["remaining"])
        else:
            lst_waste.append(current_row["remaining"])
    
df_waste["waste"] = lst_waste
def date_converter(row):
    converted_date = datetime.datetime(2018, 1, 1) + datetime.timedelta(row["best before"])
    return converted_date

df_waste["date"] = df_waste.apply(date_converter, axis = 1)
df_waste["week"] = df_waste.apply(week_of_year, axis = 1)
df_waste[df_waste["waste"] < 0]["waste"] = 0
df_waste



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice

Unnamed: 0,best before,amount,purchases,prev purchases,remaining,waste,date,week
0,14,26,57,,-31.0,-31.0,2018-01-15,3
1,17,26,62,5.0,21.0,-10.0,2018-01-18,3
2,21,26,81,19.0,7.0,-3.0,2018-01-22,4
3,24,26,98,17.0,9.0,6.0,2018-01-25,4
4,28,26,115,17.0,9.0,9.0,2018-01-29,5
...,...,...,...,...,...,...,...,...
87,318,26,1214,16.0,10.0,10.0,2018-11-15,46
88,322,26,1227,13.0,13.0,13.0,2018-11-19,47
89,325,26,1233,6.0,20.0,20.0,2018-11-22,47
90,329,26,1249,16.0,10.0,10.0,2018-11-26,48


In [131]:
df_waste["waste nn"] = [i if i > 0 else 0 for i in df_waste["waste"]]
#takes nonnegative values which is counted as waste
waste_group = df_waste.groupby("week").sum()
waste_group["weekly purchases"] = waste_group["purchases"].diff()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## `df_waste` is prepared for plotting

In [132]:
waste_group

Unnamed: 0_level_0,best before,amount,purchases,prev purchases,remaining,waste,waste nn,weekly purchases
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,31,52,119,5.0,-10.0,-41.0,0.0,
4,45,52,179,36.0,16.0,3.0,6.0,60.0
5,59,52,238,25.0,27.0,27.0,27.0,59.0
6,73,52,283,23.0,29.0,29.0,29.0,45.0
7,87,52,327,24.0,28.0,28.0,28.0,44.0
8,101,52,383,24.0,28.0,28.0,28.0,56.0
9,115,52,430,26.0,26.0,26.0,26.0,47.0
10,129,52,492,31.0,21.0,21.0,21.0,62.0
11,143,52,537,21.0,31.0,31.0,31.0,45.0
12,157,52,589,33.0,19.0,19.0,19.0,52.0


In [133]:
waste_group["week"] = waste_group.index

In [134]:
import plotly.express as px
fig = px.line(waste_group, x = "week", y = "waste nn", title="Absolute waste of {} per week".format(product))
fig.add_scatter(x=waste_group["week"], y=waste_group["amount"], mode="lines")
fig.add_scatter(x=waste_group["week"], y=waste_group["weekly purchases"], mode="lines")
fig.update_yaxes(title="Waste amount")
fig.show()

In [112]:
purchases_per_day

day
2018-01-01       8
2018-01-02      13
2018-01-03      18
2018-01-04      30
2018-01-05      40
              ... 
2018-12-26    2913
2018-12-27    2918
2018-12-28    2921
2018-12-29    2934
2018-12-30    2943
Name: product_id, Length: 364, dtype: int64

In [71]:
transactions["day"] = pd.to_datetime(transactions["day"])
filter_transactions = transactions[transactions["description"] == product]

In [86]:
daydate = df_date[df_date["day"] == 2]["date"].values[0]

filter_transactions[filter_transactions["day"] <= daydate]


Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
89,2018-01-01,10:40:13,6.0,54211938.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
467,2018-01-01,14:21:35,26.0,51433968.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
573,2018-01-01,15:36:46,33.0,30794082.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
700,2018-01-01,16:56:20,39.0,15832817.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
808,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
821,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
824,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
879,2018-01-01,18:46:48,49.0,15382238.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
29093,2018-01-02,12:52:06,1714.0,28719528.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
29134,2018-01-02,13:44:14,1718.0,39309224.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0


In [111]:
purchases_per_day = filter_transactions.groupby("day").count()["product_id"].cumsum()
purchases_per_day

day
2018-01-01       8
2018-01-02      13
2018-01-03      18
2018-01-04      30
2018-01-05      40
              ... 
2018-12-26    2913
2018-12-27    2918
2018-12-28    2921
2018-12-29    2934
2018-12-30    2943
Name: product_id, Length: 364, dtype: int64

In [52]:
daydate

numpy.datetime64('2018-01-15T00:00:00.000000000')

In [15]:
dayofyear = list(range(365))
date = [datetime.datetime(2018, 1, 1) + datetime.timedelta(day) for day in dayofyear]
df_date = pd.DataFrame({"day":dayofyear, "date":date})