In [72]:
SHERLOCK = 'sher'
METAMORPHOSIS = 'meta'
PRIDE = 'prid'
chosen_book = PRIDE

words_count = {
    SHERLOCK: 3378,
    PRIDE: 2629
}
most_common_cnt = words_count[chosen_book]

encoded_words_files = {
    SHERLOCK: f'experiments/words_data/sherlock/encoded_words_{most_common_cnt}_common.csv',
    PRIDE: f'experiments/words_data/pride_and_prejudice/encoded_words_{most_common_cnt}_common.csv'
}

## Read data from CSV

In [73]:
import csv
tns_data = []
with open(encoded_words_files[chosen_book], mode='r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        tns_data.append([ [int(item)] for item in row ])
tns_data[:2]


[[[196],
  [457],
  [1210],
  [370],
  [740],
  [49],
  [675],
  [28],
  [213],
  [8],
  [117],
  [288]],
 [[56],
  [29],
  [224],
  [38],
  [402],
  [49],
  [27],
  [51],
  [214],
  [412],
  [457],
  [22],
  [458],
  [198],
  [1782],
  [44],
  [164],
  [313],
  [1105],
  [13],
  [57]]]

## Run TNS algorithm

In [74]:
from TNS import TNS, Data


In [75]:
data = Data(tns_data)
algorithm = TNS()
rules = algorithm.run(data, k=50, min_conf=0.5, delta=2)
print(rules)

Using TNS+ETARM
(<RBTree.RBTree object at 0x7f9bfc5f5b38>, 12.130455255508423, 17195008)


In [76]:
rules_list = []
while True:
    item = rules[0].pop_maximum()
    if item:
        rules_list.append(item)
    else:
        break
print("Rules count:", len(rules_list))
rules_list

Rules count: 50


[<Rule.Rule at 0x7f9be57b14e0>,
 <Rule.Rule at 0x7f9be4d8c400>,
 <Rule.Rule at 0x7f9be717a6d8>,
 <Rule.Rule at 0x7f9be5ede860>,
 <Rule.Rule at 0x7f9be4ca7588>,
 <Rule.Rule at 0x7f9be4d43710>,
 <Rule.Rule at 0x7f9be4d1f588>,
 <Rule.Rule at 0x7f9be6353eb8>,
 <Rule.Rule at 0x7f9be4c0c5c0>,
 <Rule.Rule at 0x7f9be4c56f98>,
 <Rule.Rule at 0x7f9be7488588>,
 <Rule.Rule at 0x7f9be4745668>,
 <Rule.Rule at 0x7f9be5bb1e10>,
 <Rule.Rule at 0x7f9be4d43a90>,
 <Rule.Rule at 0x7f9be56a7cc0>,
 <Rule.Rule at 0x7f9be447aa20>,
 <Rule.Rule at 0x7f9be4bf17b8>,
 <Rule.Rule at 0x7f9be63537b8>,
 <Rule.Rule at 0x7f9be4745438>,
 <Rule.Rule at 0x7f9be4bf1da0>,
 <Rule.Rule at 0x7f9be63534a8>,
 <Rule.Rule at 0x7f9be4bf14a8>,
 <Rule.Rule at 0x7f9be4cb3eb8>,
 <Rule.Rule at 0x7f9be4bf1860>,
 <Rule.Rule at 0x7f9be4cb3f98>,
 <Rule.Rule at 0x7f9be4cb3390>,
 <Rule.Rule at 0x7f9be4bfb828>,
 <Rule.Rule at 0x7f9be5508f98>,
 <Rule.Rule at 0x7f9be710b5f8>,
 <Rule.Rule at 0x7f9be4bf1e80>,
 <Rule.Rule at 0x7f9be7696198>,
 <Rule.R

## Read most common words

In [77]:
most_common_words_files = {
    SHERLOCK: f'experiments/words_data/sherlock/most_common_words_{most_common_cnt}.csv',
    PRIDE: f'experiments/words_data/pride_and_prejudice/most_common_words_{most_common_cnt}.csv'
}

In [78]:
import csv
dictionary_arr = []
# dictionary_dict = {}
with open(most_common_words_files[chosen_book], mode='r') as dict_csv:
    reader = csv.reader(dict_csv)
    for index, row in enumerate(reader):
        dictionary_arr.append(row[0])
        # dictionary_dict[row[0]] = index


## Map rules

In [79]:
rules_list_mapped = []
for rule in rules_list:
    ante = [ dictionary_arr[ind] for ind in rule.antecedents ]
    cons = [ dictionary_arr[ind] for ind in rule.consequents ]
    rules_list_mapped.append((ante, cons, rule.support, rule.confidence))

In [80]:
for rule in rules_list_mapped:
    print(f"{', '.join(rule[0])} ==> {', '.join(rule[1])}  sup= {rule[2]}  conf= {rule[3]}")

sir ==> william  sup= 41  conf= 0.6507936507936508
de ==> bourgh  sup= 37  conf= 0.9736842105263158
dare ==> say  sup= 30  conf= 0.6122448979591837
chapter ==> elizabeth  sup= 28  conf= 0.56
miss, de ==> bourgh  sup= 20  conf= 0.9523809523809523
thousand ==> pound  sup= 19  conf= 0.6129032258064516
ladi, de ==> bourgh  sup= 18  conf= 0.9473684210526315
catherin, de ==> bourgh  sup= 16  conf= 0.8888888888888888
miss, hurst ==> bingley  sup= 8  conf= 0.8
gracechurch ==> street  sup= 7  conf= 1.0
georg ==> wickham  sup= 7  conf= 0.875
collin, de ==> bourgh  sup= 6  conf= 1.0
darci, colonel ==> fitzwilliam  sup= 6  conf= 0.6666666666666666
one, dare ==> say  sup= 6  conf= 0.6666666666666666
insist ==> upon  sup= 6  conf= 0.6
elder ==> sister  sup= 6  conf= 0.5
miss, ladi, de ==> bourgh  sup= 5  conf= 0.8333333333333334
elizabeth, colonel ==> fitzwilliam  sup= 5  conf= 0.8333333333333334
may, depend ==> upon  sup= 5  conf= 0.8333333333333334
elizabeth, de ==> bourgh  sup= 5  conf= 0.7142857

In [81]:
rules_files = {
    SHERLOCK: f'experiments/words_data/sherlock/rules_{most_common_cnt}_common_words.csv',
    PRIDE: f'experiments/words_data/pride_and_prejudice/rules_{most_common_cnt}_common_words.csv'
}

In [82]:
with open(rules_files[chosen_book], mode='w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerows(rules_list_mapped)
