# 再現実装: "主観確率の更新過程とその応用に関する研究"
author: 小川康行


### TODO:
- [ ] 3サンプルの重み計算と尤度計算を実装してみる
- [ ] 多サンプルに変更してみる

## 事前準備

In [1]:
from dstexpertsystem import PROJECT_ROOT
from dstexpertsystem.read_csv import read_csv

In [2]:
# 表4: 変換表
path_convert_table = PROJECT_ROOT / "data" / "convert_table.csv"
df_convert_table = read_csv(path_convert_table) 
df_convert_table

Unnamed: 0,1,2,3,4,5
はい,1.0,0.9,0.5,0.1,0.01
ほぼ当てはまる,0.7,0.6,0.5,0.2,0.05
不明,0.2,0.9,1.0,0.9,0.2
ほぼ当てはまらない,0.05,0.2,0.5,0.6,0.7
いいえ,0.01,0.1,0.5,0.9,1.0


In [3]:
# 表2-3: 合致度表
path_match_table = PROJECT_ROOT / "data" / "match_table.csv"
df_match_table = read_csv(path_match_table)
df_match_table

Unnamed: 0,カモ,カラス,シロクマ,ライオン,トラ,ヒョウ,タヌキ,ウマ,コアラ,ヒツジ
肉を主に食べる動物ですか？,5,2,1,1,1,1,2,5,5,5
木に登る事が出来ますか？,5,5,4,1,1,1,2,5,1,5
泳ぐことが出来ますか？,1,5,1,4,1,1,1,2,3,3
熱帯に生息する動物ですか？,3,3,5,1,2,2,2,3,4,5
人間より大きいですか？,5,5,1,1,1,4,5,1,5,4
トラですか？,5,5,5,5,1,5,5,5,5,5
卵を産みますか？,1,1,5,5,5,5,5,5,5,5
体に模様がありますか？,1,5,5,5,1,1,1,4,5,5
植物を主に食べる動物ですか？,2,2,5,4,4,4,2,1,1,1
夜行性ですか？,5,5,5,1,1,1,1,5,1,5


## 例題: 3サンプル

In [4]:
# 問題の簡単化のために３サンプルに絞って使用する
extract_columns = ["ライオン", "タヌキ", "コアラ"]
df_match_3sample = df_match_table[extract_columns]
df_match_3sample

Unnamed: 0,ライオン,タヌキ,コアラ
肉を主に食べる動物ですか？,1,2,5
木に登る事が出来ますか？,1,2,1
泳ぐことが出来ますか？,4,1,3
熱帯に生息する動物ですか？,1,2,4
人間より大きいですか？,1,5,5
トラですか？,5,5,5
卵を産みますか？,5,5,5
体に模様がありますか？,5,1,5
植物を主に食べる動物ですか？,4,2,1
夜行性ですか？,1,1,1


In [5]:
df_match_3sample_nikusyoku = df_match_3sample.iloc[[0]]
df_match_3sample_nikusyoku

Unnamed: 0,ライオン,タヌキ,コアラ
肉を主に食べる動物ですか？,1,2,5


In [8]:
# 一旦流れだけ全部作りきる

# 質問
idx = 0
question = list(df_match_3sample.index)
question[idx]

# 解答
## 肉を主に食べる動物ですか？
answer = "はい"

# 合致度を計算
match_level = df_match_3sample.iloc[idx]
match_level

ライオン    1
タヌキ     2
コアラ     5
Name: 肉を主に食べる動物ですか？, dtype: int64

In [6]:
# 重みを計算
def calc_basic_likelihood(answer):
    weight_corresponding_to_answer = df_convert_table.loc[answer]
    def func(match_level):
        return weight_corresponding_to_answer[str(match_level)]
    return func

# 基本尤度の割当
func = calc_basic_likelihood(answer)
likelihood = match_level.apply(func)

# sort
likelihood_sorted = likelihood.sort_values(ascending=True)  # 降順で並び替え
likelihood_sorted

コアラ     0.01
タヌキ     0.90
ライオン    1.00
Name: 肉を主に食べる動物ですか？, dtype: float64

In [7]:
# 基本確率の割当
masses = []

print("*** 基本尤度 ***")
print(likelihood_sorted)
lh = likelihood_sorted.copy()
masses = [[list(lh.index), lh.iloc[0]]]

for idx, _ in enumerate(lh.index[:-1]):
    mass_val = lh[lh.index[idx+1]] - lh[lh.index[idx]]
    mass_key = list(lh.index[idx+1:])
    masses.append([mass_key, mass_val])
    
# 確認用出力
print()
print("*** 基本確率 ***")
for mass in masses:
    print(mass)

*** 基本尤度 ***
コアラ     0.01
タヌキ     0.90
ライオン    1.00
Name: 肉を主に食べる動物ですか？, dtype: float64

*** 基本確率 ***
[['コアラ', 'タヌキ', 'ライオン'], 0.01]
[['タヌキ', 'ライオン'], 0.89]
[['ライオン'], 0.09999999999999998]


In [29]:
aa = (('a'), ('a', 'b'), ('a', 'b', 'c'))
bb = (('b'), ('b', 'c'), ('b', 'c', 'a'))

intersection_tables = []
for a in aa:
    intersection_tables_row = []
    for b in bb:
        intersect = set(a).intersection(b)
        intersection_tables_row.append(intersect)
    intersection_tables.append(intersection_tables_row)
    print(intersection_tables)
    print()
intersection_tables


[[set(), set(), {'a'}]]

[[set(), set(), {'a'}], [{'b'}, {'b'}, {'b', 'a'}]]

[[set(), set(), {'a'}], [{'b'}, {'b'}, {'b', 'a'}], [{'b'}, {'b', 'c'}, {'b', 'a', 'c'}]]



[[set(), set(), {'a'}],
 [{'b'}, {'b'}, {'a', 'b'}],
 [{'b'}, {'b', 'c'}, {'a', 'b', 'c'}]]

In [31]:
import pandas as pd
pd.DataFrame(intersection_tables, index=aa, columns=bb)

Unnamed: 0,b,"(b, c)","(b, c, a)"
a,{},{},{a}
"(a, b)",{b},{b},"{b, a}"
"(a, b, c)",{b},"{b, c}","{b, a, c}"


In [31]:
aa = (('a'), ('a', 'b'), ('a', 'b', 'c'))
bb = (('b'), ('b', 'c'), ('b', 'c', 'a'))

intersection_tables = []
aaa = [aa for _ in range(len(bb))]
bbb = [bb for _ in range(len(aa))]

In [32]:
aaa

[('a', ('a', 'b'), ('a', 'b', 'c')),
 ('a', ('a', 'b'), ('a', 'b', 'c')),
 ('a', ('a', 'b'), ('a', 'b', 'c'))]

In [33]:
bbb

[('b', ('b', 'c'), ('b', 'c', 'a')),
 ('b', ('b', 'c'), ('b', 'c', 'a')),
 ('b', ('b', 'c'), ('b', 'c', 'a'))]

In [50]:
import pandas as pd
new_aaa = pd.DataFrame(aaa).T
new_aaa2 = new_aaa.applymap(lambda x: [x])
new_aaa2

Unnamed: 0,0,1,2
0,[a],[a],[a]
1,"[(a, b)]","[(a, b)]","[(a, b)]"
2,"[(a, b, c)]","[(a, b, c)]","[(a, b, c)]"


In [51]:
import pandas as pd
new_bbb = pd.DataFrame(bbb)
new_bbb2 = new_bbb.applymap(lambda x: [x])
new_bbb2

Unnamed: 0,0,1,2
0,[b],"[(b, c)]","[(b, c, a)]"
1,[b],"[(b, c)]","[(b, c, a)]"
2,[b],"[(b, c)]","[(b, c, a)]"


In [52]:
new_ccc = new_aaa2 + new_bbb2
new_ccc2 = new_ccc.applymap(lambda x: set(x[0]).intersection(set(x[1])))
new_ccc2

Unnamed: 0,0,1,2
0,{},{},{a}
1,{b},{b},"{a, b}"
2,{b},"{c, b}","{c, a, b}"


In [14]:
bbb.T

Unnamed: 0,0,1,2
0,b,b,b
1,"(b, c)","(b, c)","(b, c)"
2,"(b, c, a)","(b, c, a)","(b, c, a)"


In [15]:
ccc = aaa + bbb.T
ccc

TypeError: can only concatenate tuple (not "str") to tuple

In [2]:
print(aa)
for a in aa:
    intersection_tables_row = []
    for b in bb:
        intersect = set(a).intersection(b)
        intersection_tables_row.append(intersect)
    intersection_tables.append(intersection_tables_row)
import pandas as pd
pd.DataFrame(intersection_tables, index=aa, columns=bb)

Unnamed: 0,b,"(b, c)","(b, c, a)"
a,{},{},{a}
"(a, b)",{b},{b},"{a, b}"
"(a, b, c)",{b},"{c, b}","{c, a, b}"


In [53]:
aa = [[1, 'a'], [2, 'b']]
bb = dict(aa)
bb

{1: 'a', 2: 'b'}