In [13]:
import pandas as pd
import numpy as np 
import ast
inventory = pd.read_csv("data/inventory.csv")
products = pd.read_csv("data/products.csv")
promotions = pd.read_csv("data/promotions.csv")
transactions = pd.read_csv("data/transactions.csv")


In [14]:
for colname in inventory.columns[2:]:
    inventory[colname] = [ast.literal_eval(i) for i in inventory[colname]]

cucumber = inventory["Komkommer"]
amount = 0
current_len = 0
for inv in cucumber:
    if len(inv) > current_len:
        amount += inv[-1][-1]
    current_len = len(inv)
amount


3688

In [15]:
def count_inventory(colname, df_inventory):
    item = df_inventory[colname]
    amount = 0
    current_len = 0
    for inv in item:
        if len(inv) > current_len:
            amount += inv[-1][-1]
        current_len = len(inv)
    return amount


In [16]:
dct_inv = {}
dct_transactions = {}
i = 0
for colname in inventory.columns[2:]:
    print(colname)
    value = count_inventory(colname, inventory)
    dct_inv[colname] = value
    try:
        bought_amount = transactions.groupby("description").describe()["customer"]["count"][colname]
    except(KeyError):
        bought_amount = value
    dct_transactions[colname] = bought_amount
    i += 1


Komkommer
Courgette
SnoepVegetable tomaat
Paprika Mix
Paprika
Geschrapte worteltjes
Basis Sperziebonen
Basis Rode kool met appel
Basis worteltjes fijn
Basis erwten zeer fijn
Basis sperziebonen
Biologische courgette
Biologische Bloemkool
Biologische knoflook
Biologische pompoen
Biologische kikkererwten
Biologische prei
Biologische rode kool
Biologische cherry tomaten
Biologische brocolli
Boerenkool gesneden
Spruiten
Witlof
Rucola
Nasi bami Vegetable
Snijbonen
Buitenbeentjes puntpaprika
Basis Maiskorrels
Wortelen
Pompoen stukjes
Verse zuurkool
Buitenbeentjes komkommer
Buitenbeentjes paprika
SnoepVegetable worteltjes
WokVegetable thais
Mandarijenen
Bananen
Chiquita bananen
Pitloze witte druiven
Zoete kleine appeltjes
Pink lady appels
Pitloze rode druiven
Elstar
Conference
Biologische banane
Citroenen
Mango
Blauwe bessen
Blauwe bessen.1
Aardbeien
Jonagold
Mandarijnen groot
Mango eetrijp
Buitenbeetjes appels
Limoenen
Granny smith
Grapefruit rood
Kanzi appelen
Basis annanasstukjes
Biologisch

In [17]:
transactions.groupby("description").describe()["customer"]["count"]["Bananen"]



2151.0

In [18]:
items_list = inventory.columns[2:]
sum_inventory = dct_inv.values()
purchases = dct_transactions.values()


In [19]:
category_dct = {}
categories = products["category"].unique()

vegetables = products[products["category"] == categories[0]]["description"]
for cat in categories:
    items = products[products["category"] == cat]["description"].values
    category_dct[cat] = items

categ_values = {}
for categ in categories:
    list = category_dct[categ]
    for item in list:
        categ_values[item] = categ
categ_values["Blauwe bessen.1"] = "fruit"
categ_values["Rundergehakt.1"] = "meat"
categ_values["Unox Gelderse rookworst.1"] = "meat"
categ_values["Biologisch rundergehakt.1"] = "meat"


In [20]:
final_df = pd.DataFrame({"item":items_list, "Inventory sum":sum_inventory, "purchases sum":purchases})
final_df["Waste"] = final_df["Inventory sum"] - final_df["purchases sum"]
final_df["Waste ratio"] = final_df["Waste"]/final_df["Inventory sum"]

final_df["category"] = [categ_values[i] for i in final_df["item"]]
final_df.to_csv("all_waste.csv")
final_df

Unnamed: 0,item,Inventory sum,purchases sum,Waste,Waste ratio,category
0,Komkommer,3688,2948.0,740.0,0.200651,vegetable
1,Courgette,3506,2984.0,522.0,0.148888,vegetable
2,SnoepVegetable tomaat,3860,3055.0,805.0,0.208549,vegetable
3,Paprika Mix,3324,3037.0,287.0,0.086342,vegetable
4,Paprika,3362,3023.0,339.0,0.100833,vegetable
...,...,...,...,...,...,...
150,Bakkersspeculaas,4568,3798.0,770.0,0.168564,snack
151,Eierkoeken,13300,11498.0,1802.0,0.135489,snack
152,Ribbelchips naturel,4624,3694.0,930.0,0.201125,snack
153,Ribbelchips paprika,4420,3637.0,783.0,0.177149,snack


In [21]:
final_df.sort_values("Waste ratio", ascending=False, inplace=True)

In [22]:
import plotly.express as px
fig = px.bar(final_df, x="item", y="Waste ratio", color="category")
fig.update_xaxes(title='x', visible=False, showticklabels=False)
fig.show()

In [23]:
import plotly.express as px
fig = px.bar(final_df[final_df["category"] == "vegetable"], x="item", y="Waste ratio", color_discrete_sequence=["purple"], color="category")
fig.update_xaxes(visible=True, showticklabels=True)
fig.update_layout(xaxis = dict( tickfont = dict(size=11)), xaxis_tickangle=45)
fig.show()

In [26]:
final_df[final_df["category"] == "bread"]["Waste ratio"].describe()

count    41.000000
mean      0.200616
std       0.079066
min       0.105400
25%       0.145770
50%       0.192421
75%       0.228196
max       0.461429
Name: Waste ratio, dtype: float64