In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
from collections import Counter

In [4]:
number_of_samples = 100

In [5]:
class_weights_not_normalized = np.array([10, 20, 30, 40, 50, 60, 50, 40, 30, 20])

In [6]:
class_weights = class_weights_not_normalized/class_weights_not_normalized.sum()

In [7]:
class_weights

array([0.02857143, 0.05714286, 0.08571429, 0.11428571, 0.14285714,
       0.17142857, 0.14285714, 0.11428571, 0.08571429, 0.05714286])

In [8]:
class_prices = [10, 20, 10, 5, 10, 30, 2, 23, 14, 31]

In [9]:
classes = ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")

In [10]:
subset = np.random.choice(classes, size = number_of_samples, p = class_weights)

In [11]:
bundles = {'B1':["a", "b", "c"], 'B2':["f", "h", "h"],'B3':["j", "j", "j"],'B4':["d", "e", "i"],'B5':["f", "f", "g"],
          'B6':["a", "e", "i"], 'B7':["b", "c", "d"], 'B8':["g", "i", "j"]}

In [12]:
bundles

{'B1': ['a', 'b', 'c'],
 'B2': ['f', 'h', 'h'],
 'B3': ['j', 'j', 'j'],
 'B4': ['d', 'e', 'i'],
 'B5': ['f', 'f', 'g'],
 'B6': ['a', 'e', 'i'],
 'B7': ['b', 'c', 'd'],
 'B8': ['g', 'i', 'j']}

In [13]:
bundles_discounts = {'B1': 0.1, 'B2': 0.2,'B3':0.25,'B4':0.05,'B5':0.15, 'B6': 0.35, 'B7': 0.1, 'B8':0.33}

In [14]:
bundles_discounts

{'B1': 0.1,
 'B2': 0.2,
 'B3': 0.25,
 'B4': 0.05,
 'B5': 0.15,
 'B6': 0.35,
 'B7': 0.1,
 'B8': 0.33}

In [15]:
subset = np.array(subset)

In [16]:
class_value_counts = np.unique(subset, return_counts=True)[1]

In [17]:
classes

('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j')

In [18]:
class_value_counts

array([ 3,  5, 12,  9, 10, 17, 19, 11,  8,  6], dtype=int64)

In [19]:
def get_bundle_vector(bundle):
    vector = []
    for class_ in classes:
        if class_ in bundle:
            vector.append(Counter(bundle)[class_])
        else:
            vector.append(0)
    return vector

In [20]:
bundle_vectors = {k: get_bundle_vector(v) for k, v in bundles.items()}

In [21]:
bundle_vectors

{'B1': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
 'B2': [0, 0, 0, 0, 0, 1, 0, 2, 0, 0],
 'B3': [0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
 'B4': [0, 0, 0, 1, 1, 0, 0, 0, 1, 0],
 'B5': [0, 0, 0, 0, 0, 2, 1, 0, 0, 0],
 'B6': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0],
 'B7': [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
 'B8': [0, 0, 0, 0, 0, 0, 1, 0, 1, 1]}

In [22]:
saved_money = {}
for i, v in bundle_vectors.items():
    saved_money[i] = (bundle_vectors[i] * np.array(class_prices)).sum()*bundles_discounts[i]
saved_money = {k: v for k, v in sorted(saved_money.items(), key=lambda item: item[1], reverse=True)}
saved_money

{'B3': 23.25,
 'B8': 15.510000000000002,
 'B2': 15.200000000000001,
 'B6': 11.899999999999999,
 'B5': 9.299999999999999,
 'B1': 4.0,
 'B7': 3.5,
 'B4': 1.4500000000000002}

In [23]:
class_value_counts = np.unique(subset, return_counts=True)[1]
print(class_value_counts)
for item, discount in saved_money.items():
    bundle_count = 0
    while ((class_value_counts - bundle_vectors[item]) < 0).sum() == 0:
        bundle_count +=1
        class_value_counts -= bundle_vectors[item]
    print(item + '  ' + str(bundle_count))
print(class_value_counts)

[ 3  5 12  9 10 17 19 11  8  6]
B3  2
B8  0
B2  5
B6  3
B5  6
B1  0
B7  5
B4  4
[ 0  0  7  0  3  0 13  1  1  0]


In [24]:
classes

('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j')

In [25]:
class_value_counts

array([ 0,  0,  7,  0,  3,  0, 13,  1,  1,  0], dtype=int64)

In [26]:
bundles

{'B1': ['a', 'b', 'c'],
 'B2': ['f', 'h', 'h'],
 'B3': ['j', 'j', 'j'],
 'B4': ['d', 'e', 'i'],
 'B5': ['f', 'f', 'g'],
 'B6': ['a', 'e', 'i'],
 'B7': ['b', 'c', 'd'],
 'B8': ['g', 'i', 'j']}

In [27]:
classes

('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j')

In [28]:
class_value_counts = np.unique(subset, return_counts=True)[1]
class_value_counts

array([ 3,  5, 12,  9, 10, 17, 19, 11,  8,  6], dtype=int64)

In [29]:
bundle_vectors

{'B1': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
 'B2': [0, 0, 0, 0, 0, 1, 0, 2, 0, 0],
 'B3': [0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
 'B4': [0, 0, 0, 1, 1, 0, 0, 0, 1, 0],
 'B5': [0, 0, 0, 0, 0, 2, 1, 0, 0, 0],
 'B6': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0],
 'B7': [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
 'B8': [0, 0, 0, 0, 0, 0, 1, 0, 1, 1]}

In [67]:
bundles_discounts

{'B1': 0.1,
 'B2': 0.2,
 'B3': 0.25,
 'B4': 0.05,
 'B5': 0.15,
 'B6': 0.35,
 'B7': 0.1,
 'B8': 0.33}

In [46]:
bun = np.fromiter(bundles_discounts.values(), dtype=float)

In [47]:
bun

array([0.1 , 0.2 , 0.25, 0.05, 0.15, 0.35, 0.1 , 0.33])

In [63]:
class_prices = np.array(class_prices)

In [64]:
class_prices

array([[10, 20, 10,  5, 10, 30,  2, 23, 14, 31]])

In [55]:
bun = bun.reshape((8,1))

In [56]:
class_prices = class_prices.reshape((1,10))

In [81]:
discounted_prices = ((1 - bun) * class_prices).transpose()

In [82]:
discounted_prices

array([[ 9.  ,  8.  ,  7.5 ,  9.5 ,  8.5 ,  6.5 ,  9.  ,  6.7 ],
       [18.  , 16.  , 15.  , 19.  , 17.  , 13.  , 18.  , 13.4 ],
       [ 9.  ,  8.  ,  7.5 ,  9.5 ,  8.5 ,  6.5 ,  9.  ,  6.7 ],
       [ 4.5 ,  4.  ,  3.75,  4.75,  4.25,  3.25,  4.5 ,  3.35],
       [ 9.  ,  8.  ,  7.5 ,  9.5 ,  8.5 ,  6.5 ,  9.  ,  6.7 ],
       [27.  , 24.  , 22.5 , 28.5 , 25.5 , 19.5 , 27.  , 20.1 ],
       [ 1.8 ,  1.6 ,  1.5 ,  1.9 ,  1.7 ,  1.3 ,  1.8 ,  1.34],
       [20.7 , 18.4 , 17.25, 21.85, 19.55, 14.95, 20.7 , 15.41],
       [12.6 , 11.2 , 10.5 , 13.3 , 11.9 ,  9.1 , 12.6 ,  9.38],
       [27.9 , 24.8 , 23.25, 29.45, 26.35, 20.15, 27.9 , 20.77]])

In [83]:
bundle_vectors_np = pd.DataFrame.from_dict(bundle_vectors).values
bundle_vectors_np

array([[1, 0, 0, 0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 1, 0, 0],
       [0, 1, 0, 0, 2, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 1],
       [0, 2, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 1, 0, 1],
       [0, 0, 3, 0, 0, 0, 0, 1]], dtype=int64)

In [87]:
a = [1,2,3,4,5,6,7,8]
a = np.array(a).reshape((1,8))
a

array([[1, 2, 3, 4, 5, 6, 7, 8]])

In [90]:
sum_prod = a * bundle_vectors_np
sum_prod

array([[ 1,  0,  0,  0,  0,  6,  0,  0],
       [ 1,  0,  0,  0,  0,  0,  7,  0],
       [ 1,  0,  0,  0,  0,  0,  7,  0],
       [ 0,  0,  0,  4,  0,  0,  7,  0],
       [ 0,  0,  0,  4,  0,  6,  0,  0],
       [ 0,  2,  0,  0, 10,  0,  0,  0],
       [ 0,  0,  0,  0,  5,  0,  0,  8],
       [ 0,  4,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  4,  0,  6,  0,  8],
       [ 0,  0,  9,  0,  0,  0,  0,  8]], dtype=int64)

In [92]:
mult = discounted_prices * sum_prod 
print(mult)
mult.sum()

[[  9.     0.     0.     0.     0.    39.     0.     0.  ]
 [ 18.     0.     0.     0.     0.     0.   126.     0.  ]
 [  9.     0.     0.     0.     0.     0.    63.     0.  ]
 [  0.     0.     0.    19.     0.     0.    31.5    0.  ]
 [  0.     0.     0.    38.     0.    39.     0.     0.  ]
 [  0.    48.     0.     0.   255.     0.     0.     0.  ]
 [  0.     0.     0.     0.     8.5    0.     0.    10.72]
 [  0.    73.6    0.     0.     0.     0.     0.     0.  ]
 [  0.     0.     0.    53.2    0.    54.6    0.    75.04]
 [  0.     0.   209.25   0.     0.     0.     0.   166.16]]


1345.57