In [1]:
%matplotlib inline  

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Use the retailer loss percentage data taken from USDA:   
  https://www.ers.usda.gov/data-products/food-availability-per-capita-data-system/loss-adjusted-food-availability-documentation/  
https://www.ers.usda.gov/webdocs/publications/47570/8043_tb1927.pdf?v=41056

In [26]:
%%writefile retailer_loss.tsv
Commodity	Food supply (billion lbs)	Losses - Retail (billion lbs)	Losses - Retail (percent)	Losses - Consumer (billion lbs)	Losses - Consumer (percent)	Losses - Total (billion lbs)	Losses - Total (percent)
Grain products	60.4	7.2	12	11.3	19	18.5	31
Fresh fruit	37.6	4.4	12	9.5	25	13.9	37
Processed fruit	26.7	1.6	6	2.9	11	4.5	17
Fresh vegetables	53.5	5.2	10	12.8	24	18.0	34
Processed vegetables	30.4	1.8	6	5.3	18	7.1	24
Fluid milk	53.8	6.5	12	10.5	20	17.0	32
Other dairy products	29.1	2.8	10	5.7	19	8.5	29
Meat	31.6	1.4	4	7.2	23	8.6	27
Poultry	22.0	0.9	4	3.9	18	4.8	22
Fish and seafood	4.8	0.4	8	1.5	31	1.9	39
Eggs	9.8	0.7	7	2.1	21	2.8	28
Tree nuts and peanuts	3.5	0.2	6	0.3	9	0.5	15
Added sugar and sweeteners	40.8	4.5	11	12.3	30	16.7	41
Added fats and oils	26.0	5.4	21	4.5	17	9.9	38

Overwriting retailer_loss.tsv


In [27]:
raw_df = pd.read_csv('./retailer_loss.tsv', sep='\t')

In [28]:
df = raw_df.copy()
# drop last column, the total column
# df.drop(df.tail(1).index,inplace=True)
df

Unnamed: 0,Commodity,Food supply (billion lbs),Losses - Retail (billion lbs),Losses - Retail (percent),Losses - Consumer (billion lbs),Losses - Consumer (percent),Losses - Total (billion lbs),Losses - Total (percent)
0,Grain products,60.4,7.2,12,11.3,19,18.5,31
1,Fresh fruit,37.6,4.4,12,9.5,25,13.9,37
2,Processed fruit,26.7,1.6,6,2.9,11,4.5,17
3,Fresh vegetables,53.5,5.2,10,12.8,24,18.0,34
4,Processed vegetables,30.4,1.8,6,5.3,18,7.1,24
5,Fluid milk,53.8,6.5,12,10.5,20,17.0,32
6,Other dairy products,29.1,2.8,10,5.7,19,8.5,29
7,Meat,31.6,1.4,4,7.2,23,8.6,27
8,Poultry,22.0,0.9,4,3.9,18,4.8,22
9,Fish and seafood,4.8,0.4,8,1.5,31,1.9,39


In [7]:
df.columns.values

array(['Commodity', 'Food supply (billion lbs)',
       'Losses - Retail (billion lbs)', 'Losses - Retail (percent)',
       'Losses - Consumer (billion lbs)', 'Losses - Consumer (percent)',
       'Losses - Total (billion lbs)', 'Losses - Total (percent)'], dtype=object)

In [29]:
df_retailer = df[df.columns.values[:4]]
df_retailer.head()

Unnamed: 0,Commodity,Food supply (billion lbs),Losses - Retail (billion lbs),Losses - Retail (percent)
0,Grain products,60.4,7.2,12
1,Fresh fruit,37.6,4.4,12
2,Processed fruit,26.7,1.6,6
3,Fresh vegetables,53.5,5.2,10
4,Processed vegetables,30.4,1.8,6


In [30]:
total_supply = df_retailer['Food supply (billion lbs)'].sum()
print(total_supply)
df_retailer['supply_normalized_percentage'] = df_retailer['Food supply (billion lbs)'].apply(lambda x: float(x) / total_supply)
df_retailer

430.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


Unnamed: 0,Commodity,Food supply (billion lbs),Losses - Retail (billion lbs),Losses - Retail (percent),supply_normalized_percentage
0,Grain products,60.4,7.2,12,0.140465
1,Fresh fruit,37.6,4.4,12,0.087442
2,Processed fruit,26.7,1.6,6,0.062093
3,Fresh vegetables,53.5,5.2,10,0.124419
4,Processed vegetables,30.4,1.8,6,0.070698
5,Fluid milk,53.8,6.5,12,0.125116
6,Other dairy products,29.1,2.8,10,0.067674
7,Meat,31.6,1.4,4,0.073488
8,Poultry,22.0,0.9,4,0.051163
9,Fish and seafood,4.8,0.4,8,0.011163


In [31]:
def get_normalized_loss(r):
    return r['supply_normalized_percentage'] * r['Losses - Retail (percent)'] / 100
df_retailer['retail_normalized_loss'] = df_retailer.apply(get_normalized_loss, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [32]:
df_retailer[['Commodity', 'supply_normalized_percentage', 'retail_normalized_loss']].head(5)

Unnamed: 0,Commodity,supply_normalized_percentage,retail_normalized_loss
0,Grain products,0.140465,0.016856
1,Fresh fruit,0.087442,0.010493
2,Processed fruit,0.062093,0.003726
3,Fresh vegetables,0.124419,0.012442
4,Processed vegetables,0.070698,0.004242


In [39]:
169. / 1370

0.12335766423357664

In [33]:
df_retailer_normalized = df_retailer[['Commodity', 'supply_normalized_percentage', 'retail_normalized_loss']]

In [34]:
commodity_idx = np.random.choice(len(df_retailer_normalized),p = df_retailer_normalized['supply_normalized_percentage'].values)
df_retailer_normalized.ix[commodity_idx]

Commodity                             Eggs
supply_normalized_percentage     0.0227907
retail_normalized_loss          0.00159535
Name: 10, dtype: object

In [116]:
df_retailer_normalized.to_dict(orient='records')

[{'Commodity': 'Grain products',
  'retail_normalized_loss': 0.010072262367982213,
  'supply_normalized_percentage': 0.08393551973318511},
 {'Commodity': 'Fruit',
  'retail_normalized_loss': 0.008041967759866592,
  'supply_normalized_percentage': 0.08935519733185103},
 {'Commodity': 'Fresh fruit',
  'retail_normalized_loss': 0.006270150083379656,
  'supply_normalized_percentage': 0.05225125069483047},
 {'Commodity': 'Processed fruit',
  'retail_normalized_loss': 0.0022262367982212343,
  'supply_normalized_percentage': 0.037103946637020574},
 {'Commodity': 'Vegetables',
  'retail_normalized_loss': 0.009327404113396333,
  'supply_normalized_percentage': 0.11659255141745416},
 {'Commodity': 'Fresh vegetables',
  'retail_normalized_loss': 0.007434685936631463,
  'supply_normalized_percentage': 0.07434685936631463},
 {'Commodity': 'Processed vegetables',
  'retail_normalized_loss': 0.0025347415230683712,
  'supply_normalized_percentage': 0.042245692051139525},
 {'Commodity': 'Dairy products

## Now we can finally start simulate some data, for the datasets I am simulating a Gaussian Distribution over the normalized loss

In [35]:
num_retailer_stores = 2
num_days = 30
restock_period = 7 # change?
sigma_supply = 0.005
sigma_loss = 0.0001

In [36]:
# personalities? more wasteful, less wasteful, medium wasteful
def getRetailerPersonality(id):
    wasteful_match = {
        0: 0.0001,
        1: 0.0,
        2: -0.0001
    }
    t = id % len(wasteful_match)
    return wasteful_match[t]
getRetailerPersonality(2)

-0.0001

In [37]:
def generateData():
    data = []
    def generateSupplyGauss(s):
        return np.random.normal(s, sigma_supply, 1)[0]
    def generateLossGauss(s):
        return np.random.normal(s, sigma_loss, 1)[0]
    
    for d in range(num_days):
        for i in range(num_retailer_stores):
            retailer_supply = df_retailer_normalized.copy()
            retailer_supply['retailer_id'] = i
            retailer_supply['day'] = d
            
            # Simulated Supply
            retailer_supply['generated_supply'] = retailer_supply['supply_normalized_percentage'].apply(generateSupplyGauss)
            # Simulated Loss
            retailer_supply['generated_loss'] = retailer_supply['retail_normalized_loss'].apply(generateLossGauss)
            retailer_supply['generated_loss'] = retailer_supply['generated_loss'] + getRetailerPersonality(i)
            
            data += retailer_supply.to_dict(orient='records')
    return pd.DataFrame(data)[['retailer_id','day','Commodity','generated_loss', 'generated_supply']]
data = generateData()
data.groupby('retailer_id').head(20)

Unnamed: 0,retailer_id,day,Commodity,generated_loss,generated_supply
0,0,0,Grain products,0.016945,0.137111
1,0,0,Fresh fruit,0.010683,0.089904
2,0,0,Processed fruit,0.004018,0.059931
3,0,0,Fresh vegetables,0.012558,0.123725
4,0,0,Processed vegetables,0.004275,0.076965
5,0,0,Fluid milk,0.01507,0.125252
6,0,0,Other dairy products,0.007032,0.068632
7,0,0,Meat,0.002936,0.073081
8,0,0,Poultry,0.001997,0.055707
9,0,0,Fish and seafood,0.000933,0.014694


## Integrate stocking patterns