In [13]:
#Importing necessary libraries (These are all the required libraries to run the full notebook)
import pandas as pd
import numpy as np 
import datetime
import ast
import plotly.express as px #only needed for plots
from sklearn.linear_model import Ridge
pd.set_option('mode.chained_assignment', None) #turns off pandas warnings
# Importing our custom functions, please make sure that the files are in the same directory as this notebook
from discounts import disc_per_day
from waste_functions import waste_analysis
#reading in the four datasets
inventory = pd.read_csv("inventory.csv")
products = pd.read_csv("products.csv")
promotions = pd.read_csv("promotions.csv")
transactions = pd.read_csv("transactions.csv")
#----- Deals with products having sold in more than one size
tup_list2 = [('Blauwe bessen', '150g'), ('Rundergehakt', '300g '), ('Unox Gelderse rookworst', '285g'), ('Biologisch rundergehakt', '300g')]
for pair in tup_list2:
    transactions.loc[transactions[(transactions['description'] == pair[0]) & (transactions['size'] == pair[1])].index, 'description'] = pair[0] + '.1'
    products.loc[products[(products['description'] == pair[0]) & (products['size'] == pair[1])].index, 'description'] = pair[0] + '.1'


In [3]:
discounts_per_day = disc_per_day(transactions) #returns a table with the nr of products on discount for each day 
# long running time

## Start the waste optimization
First pick a product and run the following cells and at the end it will produce the table and plots we used in our presentation and report

In [4]:
product = "Biologische brocolli" #product we want to analyze MODIFY to get results for another product

In [14]:
full_output_table = waste_analysis(inventory = inventory, transactions = transactions, df_product=products, product=product, discount_per_day=discounts_per_day)


(101, 7)
Extended waste dataframe created
Data prepared for prediction
Model fitted; test set metrics: MAE: 1.8015788739042502 MSE: 5.127727203165515, R^2 score: -0.18497453537465103
Demand function fitted
Average of actual/estimated purchase ratios: 0.9990608214906719
Waste change predicted, calculating revenue loss...


In [15]:
full_output_table.head() #output table used for our main waste analysis and optimization

Unnamed: 0_level_0,amount,total_inventory,remaining_stock,DOY,cumulative purchases,purchases,remaining,waste,week,waste nn,...,avg_price,std_price,expected purchases,expected waste,expected loss revenue,expected loss profit,expected waste cost,loss revenue,loss profit,waste cost
best before day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
14,26,67.0,41.0,14.0,42.0,,-16.0,-16.0,3,0.0,...,1.79,1.79,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,26,77.0,51.0,17.0,52.0,10.0,16.0,0.0,3,0.0,...,1.599067,1.79,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21,26,86.0,60.0,21.0,71.0,19.0,7.0,7.0,4,7.0,...,1.696238,1.79,25.0,1.0,1.7,1.286238,0.41,12.53,9.66,2.87
24,26,78.0,52.0,24.0,77.0,6.0,20.0,20.0,4,20.0,...,1.6468,1.79,8.0,18.0,29.64,22.2624,7.38,35.8,27.6,8.2
28,26,85.0,59.0,28.0,95.0,18.0,8.0,8.0,5,8.0,...,1.582737,1.79,24.0,2.0,3.17,2.345474,0.82,14.32,11.04,3.28


In [9]:
# >>>> Cells only needed for the plots
# NOTE: The created plots are for one product chosen at the beginning of the notebook
money_cols = ["loss revenue", "loss profit", "waste cost", "expected loss revenue", "expected loss profit", "expected waste cost"]
results = pd.DataFrame({"Metric":["Loss revenue", "Loss profit", "Waste cost","Loss revenue", "Loss profit", "Waste cost"], "Method":["Actual", "Actual", "Actual", "Predicted w/discounts", "Predicted w/discounts", "Predicted w/discounts"], "values":full_output_table[money_cols].sum().values.round(1)})

In [10]:
# >>>> Barchart for revenue, profit loss and waste costs with and without optimization
fig = px.bar(results, x="Metric", y='values', color="Method", barmode='group', text="values")
fig.update_layout(showlegend=False, xaxis_title="Product",
    yaxis_title="Out of Stock Event Count",width=1000,
    height=700, title={
                'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        paper_bgcolor='rgba(0,0,0,0)')
fig.show()

In [12]:
# >>>> Linechart for revenue loss per week
output_copy = full_output_table.groupby("week").sum()
output_copy["MA7"] = output_copy['expected loss revenue'].rolling(7).mean()
output_copy["MA7_exp"] = output_copy["loss revenue"].rolling(7).mean()
fig = px.line(output_copy, x=output_copy.index, y="MA7_exp", title="Loss of revenue for biological  vegetables per week")
fig.add_scatter(x=output_copy.index, y=output_copy["MA7"], mode="lines", name="With discounts applied")
fig.update_layout(showlegend=False, xaxis_title="Week nr",
    yaxis_title="Revenue loss in €",width=1000,
    height=700, title={
        
                'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        paper_bgcolor='rgba(0,0,0,0)')
fig.show()