In [1]:
import pandas as pd
import numpy as np

### Erstellen der Ineraction Matrix
----

In [2]:
data = np.array([[1,1,2,3],['A','A','B','B']]).T

In [3]:
df = pd.DataFrame(data, columns = ['user_id','product_name'])
df

Unnamed: 0,user_id,product_name
0,1,A
1,1,A
2,2,B
3,3,B


In [4]:
# create interaction matrix
interaction_matrix_count = df.pivot_table(index='user_id', columns='product_name', aggfunc=len, fill_value=0)
interaction_matrix_count.head()

product_name,A,B
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2,0
2,0,1
3,0,1


In [5]:
interaction_matrix_binary = df.pivot_table(index='user_id', columns='product_name', aggfunc=len, fill_value=0)

In [6]:
def binary(x):
    if x > 0:
        x = 1
    else:
        x = 0
    return x

In [7]:
interaction_matrix_binary.applymap(binary)

product_name,A,B
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,0
2,0,1
3,0,1


## User Product Rating

In order to get a user-item rating for the interaction matrix, we had to develop a rating function.

The following thoughts were made:
- We want to give sufficient weight to the first product purchase
    - We decided that the first purchase has a weight $\displaystyle \omega = \frac{1}{3}$
- We want to even more weight a reorder of a product (this assumes that the user liked the product)
    - To create the gap, we defined the minimum gap of $\displaystyle 2 \cdot \omega$
- In addition, we do not want to weight multiple reorders too much that users with e.g. 6 or 10 reorders differs not too much
    - To achieve this, we can take the square-root of the number of reorders since $\sqrt{x}\,\, |\,\, x \geq 0\,\,$ is concave
- The rating should be a number between 0 and 1

Therefore the following formula has been developed:

$\displaystyle{ rating(o, p) =
  \begin{cases}
    0            & \quad \text{if } o \text{ is } 0\\
    \omega       & \quad \text{if } o \text{ is }1 \land max(p) > 1\\
    2 \cdot \omega + (1 - 2 \cdot \omega) \cdot \sqrt{\frac{o}{max(p)}}  & \quad \text{if } o \text{ is } \geq 1 \lor \left( o \text{ is }1 \land max(p) \text{ is } 1 \right)
  \end{cases}}$
    
- where $o$ is the number of orders of the specified product $p$ of the user and $max(p)$ is the max count of all user orders of the specified product $p$.
- $\omega$ must be well defined $0 < \omega < 0.5$, optimally somewhere in the middle

This approach does strongly weight the first reorder and neither takes the number of orders of a user nor the aisles into account.

In [118]:
theta = 0.35
max_ = 10 #max(p)
for o in range(max_ + 1):
    if o == 0:
        x = 0
    elif o == 1 and max_ > 1:
        x = theta
    else:
        x = 2*theta + (1-2*theta) * np.sqrt(o/max_)
    print(x)

0
0.35
0.8341640786499873
0.8643167672515498
0.8897366596101027
0.9121320343559642
0.9323790007724451
0.9509980079602227
0.9683281572999747
0.9846049894151541
1.0


In [140]:
theta = 1/3
o_tot = 10 # products in o
# o total o of customer
low_freq = 10

for o_tot in range(low_freq+50):
    print("o_tot =", o_tot)
    for o in range(o_tot + 1):
        if o == 0:
            x = 0
        elif o == 1:
            x = theta
        else:
            if o_tot < low_freq:
                w_freq = np.sqrt(o_tot / low_freq)
            else:
                w_freq = 1

            w_prod = np.sqrt(o / o_tot)
            
            x = theta + (1-theta) * w_prod * w_freq

        print("x =", x)
        

o_tot = 0
x = 0
o_tot = 1
x = 0
x = 0.3333333333333333
o_tot = 2
x = 0
x = 0.3333333333333333
x = 0.6314757303333053
o_tot = 3
x = 0
x = 0.3333333333333333
x = 0.6314757303333054
x = 0.6984817050034441
o_tot = 4
x = 0
x = 0.3333333333333333
x = 0.6314757303333054
x = 0.6984817050034442
x = 0.7549703546891173
o_tot = 5
x = 0
x = 0.3333333333333333
x = 0.6314757303333054
x = 0.6984817050034442
x = 0.7549703546891173
x = 0.804737854124365
o_tot = 6
x = 0
x = 0.3333333333333333
x = 0.6314757303333053
x = 0.6984817050034442
x = 0.7549703546891173
x = 0.804737854124365
x = 0.8497311128276557
o_tot = 7
x = 0
x = 0.3333333333333333
x = 0.6314757303333054
x = 0.6984817050034441
x = 0.7549703546891173
x = 0.804737854124365
x = 0.8497311128276557
x = 0.8911066843560504
o_tot = 8
x = 0
x = 0.3333333333333333
x = 0.6314757303333053
x = 0.698481705003444
x = 0.7549703546891173
x = 0.804737854124365
x = 0.8497311128276557
x = 0.8911066843560504
x = 0.9296181273332773
o_tot = 9
x = 0
x = 0.33333333333

x = 0.9086326181751276
x = 0.9167932993249117
x = 0.9248414031771077
x = 0.9327814640883814
x = 0.9406177199605006
x = 0.9483541386859236
x = 0.9559944416354034
x = 0.9635421245822147
x = 0.9710004763968518
x = 0.9783725957958624
x = 0.9856614063867757
x = 0.9928696702163218
x = 1.0
o_tot = 48
x = 0
x = 0.3333333333333333
x = 0.4694160968212877
x = 0.5
x = 0.5257834230632086
x = 0.5484990747893009
x = 0.5690355937288492
x = 0.5879208719419912
x = 0.605498860309242
x = 0.6220084679281462
x = 0.6376236430584257
x = 0.6524757025854461
x = 0.6666666666666667
x = 0.6802776665776888
x = 0.6933744832448812
x = 0.7060113295832984
x = 0.7182335127930839
x = 0.7300793571412695
x = 0.7415816237971964
x = 0.7527685797372639
x = 0.7636648162452686
x = 0.7742918851774319
x = 0.7846688002575534
x = 0.794812436828782
x = 0.804737854124365
x = 0.8144585576580216
x = 0.8239867147959915
x = 0.8333333333333333
x = 0.8425084105506491
x = 0.8515210585049342
x = 0.8603796100280634
x = 0.8690917089440531
x = 