In [1]:
import numpy
import pandas
import math
from itertools import combinations

df1 = pandas.DataFrame(
  {
      "a": [0, 0, 0, 1, 1, 1, 1],
      "b": [0, 1, 1, 0, 0, 1, 1],
      "c": [1, 0, 1, 0, 1, 0, 1],
      "profit": [40, 20, 65, 0, 50, 30, 100]
  }
)
player_list = ["a", "b", "c"]

def power_set(arr):
    return [set(s) for i in range(len(arr)+1) for s in combinations(arr, i)]

powerset = power_set(player_list)

def generate_key_value(df, player_list):
    for i, row in df.iterrows():
        yield "".join(row[player_list].apply(lambda x: str(x)).values), row["profit"]

profit = {k: v for k, v in generate_key_value(df1, player_list)}
        
def shapley_value(player, player_list, powerset, profit):
    n = len(max(powerset))
    exclude_set = [s for s in powerset if player not in s]
    include_set = [s | set({player}) for s in exclude_set]
    v = 0
    for se, si in zip(exclude_set, include_set):
        key_se = "".join(
            ["0" if p not in se else "1" for p in player_list])
        v_se = profit.get(key_se, 0)
        key_si = "".join(
            ["0" if p not in si else "1" for p in player_list])
        v_si = profit.get(key_si, 0)
        c = math.factorial(len(se)) * math.factorial(n -len(se) - 1) / math.factorial(n)
        v += c * (v_si - v_se)
    return v
    


In [2]:
evaluation = {
    p: shapley_value(p, player_list, powerset, profit) for p in player_list
}

In [3]:
evaluation

{'a': 15.0, 'b': 32.5, 'c': 52.5}

In [4]:
df1

Unnamed: 0,a,b,c,profit
0,0,0,1,40
1,0,1,0,20
2,0,1,1,65
3,1,0,0,0
4,1,0,1,50
5,1,1,0,30
6,1,1,1,100


In [5]:
from sklearn import linear_model
lm = linear_model.LinearRegression(fit_intercept=False)
lm.fit(df1[player_list], df1["profit"])
df1["profit"].max() / sum(lm.coef_) * lm.coef_

array([10.76923077, 32.30769231, 56.92307692])

In [6]:
df2 = pandas.DataFrame(
  {
      "a": [0, 0, 0, 1, 1, 1, 1],
      "b": [0, 1, 1, 0, 0, 1, 1],
      "c": [1, 0, 1, 0, 1, 0, 1],
      "profit": [0, 0, 0, 0, 100, 5, 180]
  }
)

In [7]:
df2

Unnamed: 0,a,b,c,profit
0,0,0,1,0
1,0,1,0,0
2,0,1,1,0
3,1,0,0,0
4,1,0,1,100
5,1,1,0,5
6,1,1,1,180


In [8]:
profit = {k: v for k, v in generate_key_value(df2, player_list)}
evaluation = {
    p: shapley_value(p, player_list, powerset, profit) for p in player_list
}

In [9]:
evaluation

{'a': 77.5, 'b': 27.499999999999996, 'c': 75.0}

In [10]:
lm = linear_model.LinearRegression(fit_intercept=False)
lm.fit(df2[player_list], df2["profit"])

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=1, normalize=False)

In [11]:
lm.coef_

array([48.75, -1.25, 46.25])

In [12]:
df2["profit"].max() / sum(lm.coef_) * lm.coef_


array([93.6, -2.4, 88.8])

In [13]:
power_set(["a", "b"])

[set(), {'a'}, {'b'}, {'a', 'b'}]

In [14]:
math.factorial(100)

93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000