In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Reading in the Data and processing

In [3]:
df_clean = pd.read_csv("processed_data/3_months_data.csv")

In [108]:
df_clean

Unnamed: 0.1,Unnamed: 0,sensor_id,value,time_scal,event
0,0,5895,0.0,0.070203,a
1,1,5889,0.0,0.070204,k
2,2,5896,0.0,0.070204,c
3,3,6253,0.0,0.070204,d
4,4,5887,0.0,0.070204,i
...,...,...,...,...,...
44218,44218,6632,0.0,0.037144,e
44219,44219,6632,1.0,0.037148,E
44220,44220,6632,0.0,0.037150,e
44221,44221,6632,1.0,0.037156,E


# Convertion to format for the SPEED algorithm

In [6]:
def create_on_off_Multi_char():
    dic_on_off = {}
    dic_on_off[5895] = ["a","A"]
    dic_on_off[7125] = ["b", "B"]
    dic_on_off[5896] = ["c", "C"]
    dic_on_off[6253] = ["d", "D"]
    dic_on_off[6632] = ["e", "E"]
    dic_on_off[6633] = ["f", "F"]
    dic_on_off[6635] = ["g", "G"]
    dic_on_off[6896] = ["h", "H"]
    dic_on_off[5887] = ["i", "I"]
    dic_on_off[5888] = ["j", "J"]
    dic_on_off[5889] = ["k", "K"]
    dic_on_off[5893] = ["l", "L"]
    return dic_on_off

In [7]:
def create_on_off_Multi_int():
    dic_on_off = {}
    dic_on_off[5895] = [0,1]
    dic_on_off[7125] = [2, 3]
    dic_on_off[5896] = [4,5]
    dic_on_off[6253] = [6, 7]
    dic_on_off[6632] = [8, 9]
    dic_on_off[6633] = [10, 11]
    dic_on_off[6635] = [12, 13]
    dic_on_off[6896] = [14, 15]
    dic_on_off[5887] = [16, 17]
    dic_on_off[5888] = [18, 19]
    dic_on_off[5889] = [20, 21]
    dic_on_off[5893] = [22, 23]
    return dic_on_off

In [8]:
dic_on_off = create_on_off_Multi_char()
dic_on_off

{5887: ['i', 'I'],
 5888: ['j', 'J'],
 5889: ['k', 'K'],
 5893: ['l', 'L'],
 5895: ['a', 'A'],
 5896: ['c', 'C'],
 6253: ['d', 'D'],
 6632: ['e', 'E'],
 6633: ['f', 'F'],
 6635: ['g', 'G'],
 6896: ['h', 'H'],
 7125: ['b', 'B']}

In [9]:
dic_on_off_int = create_on_off_Multi_int()
dic_on_off_int

{5887: [16, 17],
 5888: [18, 19],
 5889: [20, 21],
 5893: [22, 23],
 5895: [0, 1],
 5896: [4, 5],
 6253: [6, 7],
 6632: [8, 9],
 6633: [10, 11],
 6635: [12, 13],
 6896: [14, 15],
 7125: [2, 3]}

In [10]:
def add_event_col(df, dic):

    add = []

    for index, row in df.iterrows():

        sensor = row["sensor_id"]
        value = int(row["value"])

        add.append(dic[sensor][value])

    df["event"] = np.array(add)
    return df

In [11]:
df = add_event_col(df_clean, dic_on_off)
df = df[["time_scal", "event"]]

In [12]:
df["event"].value_counts()

k    6639
K    6638
a    3815
A    3814
i    3576
I    3575
l    3058
L    3057
c    1752
C    1751
e    1344
E    1343
d    1055
D    1054
B     374
b     374
j     344
J     343
f     111
F     110
h      38
H      37
g      11
G      10
Name: event, dtype: int64

# The SPEED Algorithm

# df and sequence list

In [13]:
df

Unnamed: 0,time_scal,event
0,0.070203,a
1,0.070204,k
2,0.070204,c
3,0.070204,d
4,0.070204,i
...,...,...
44218,0.037144,e
44219,0.037148,E
44220,0.037150,e
44221,0.037156,E


In [14]:
sequence_list = df.values.tolist()    # [timestamp, single event]
print("length of total sequence:", len(sequence_list))
print("length of training sequence:", int(len(sequence_list)*0.8))
print("length of testing sequence:", int(len(sequence_list)*0.2))

length of total sequence: 44223
length of training sequence: 35378
length of testing sequence: 8844


In [15]:
training_data = sequence_list[:int(len(sequence_list)*0.8)]
testing_data = sequence_list[int(len(sequence_list)*0.8):]
print("training:", len(training_data))
print("testing:", len(testing_data))

training: 35378
testing: 8845


#Creating Episodes

In [16]:
caps = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"]   # turning on
lows = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"]   # turning off

In [17]:
def create_episodes(episode_seq, episode_list, sequence_list):
  for item_id, item in enumerate(sequence_list):
    item_time = item[0]
    item_event = item[1]
    episode_seq += item_event
    if item_event in caps:      # start of episode
      episode = item_event
      #print("start of episode: ", item_event)
      opp_event = False
      plus_id = 1
      while opp_event == False and item_id+plus_id < len(sequence_list)-1:
        next_item = sequence_list[item_id + plus_id]
        if next_item[1] == item_event.lower():        # end of episode
          episode += next_item[1]
          episode_list.append(episode)
          opp_event = True
        else:
          episode += next_item[1]
          plus_id += 1
      #print("end of episode, episode: ", episode)

  return episode_list, episode_seq

In [18]:
# tree[key] = [occurence of key, {"seq1 following key":occur of seq1, "seq2 following key":occur of seq2}]
# episode_seq = sub_episode_seq   # to be deleted later after testing

def create_dict_for_tree(episode_list):
  for ep_id, ep in enumerate(episode_list):
    #print("ep[0]: ", ep[0])
    if ep[0] not in dict_tree.keys():
      dict_tree[ep[0]] = [1, {}]
    else:
      dict_tree[ep[0]][0] += 1
      cumu_seq = ""
      for next_item in ep[1:]:
        cumu_seq += next_item
        if cumu_seq in dict_tree[ep[0]][1].keys():
          dict_tree[ep[0]][1][cumu_seq] += 1
        else:
          dict_tree[ep[0]][1][cumu_seq] = 1

  return dict_tree


In [19]:
episode_seq = ""
episode_list = list()
dict_tree = dict()     # [occurences of the alphabet, [[list of things that follows, occurences]]]
episode_list, episode_seq = create_episodes(episode_seq, episode_list, training_data)
dict_tree = create_dict_for_tree(episode_list)

In [20]:
dict_tree.keys()

dict_keys(['A', 'L', 'D', 'I', 'C', 'E', 'F', 'G', 'K', 'J', 'H', 'B'])

In [None]:
dict_tree

#Create nested list as stat tree

In [22]:
# dictionary created above, used to map to decision tree
for key, item in dict_tree.items():
  print("root: ", key, "\tnumber of times as a root: ", item[0], "\tnumber of children: ", len(item[1]))

root:  A 	number of times as a root:  3814 	number of children:  9272
root:  L 	number of times as a root:  2712 	number of children:  2476
root:  D 	number of times as a root:  951 	number of children:  59
root:  I 	number of times as a root:  2551 	number of children:  7272
root:  C 	number of times as a root:  1339 	number of children:  5127
root:  E 	number of times as a root:  1094 	number of children:  2077
root:  F 	number of times as a root:  92 	number of children:  97
root:  G 	number of times as a root:  9 	number of children:  4
root:  K 	number of times as a root:  4610 	number of children:  10269
root:  J 	number of times as a root:  273 	number of children:  38
root:  H 	number of times as a root:  32 	number of children:  45
root:  B 	number of times as a root:  205 	number of children:  10229


In [23]:
dict_tree['A']

[3814,
 {'L': 185,
  'Ll': 112,
  'Lla': 17,
  'a': 3205,
  'i': 34,
  'iI': 31,
  'iIi': 29,
  'iIiI': 29,
  'iIiIa': 1,
  'Li': 14,
  'LiD': 4,
  'LiDd': 4,
  'LiDdl': 1,
  'LiDdlI': 1,
  'LiDdlIa': 1,
  'e': 3,
  'eg': 1,
  'egf': 1,
  'egfa': 1,
  'c': 74,
  'cC': 43,
  'cCa': 18,
  'C': 63,
  'Ca': 15,
  'cCc': 21,
  'cCca': 4,
  'Cc': 46,
  'Cca': 9,
  'Lil': 4,
  'LilC': 1,
  'LilCc': 1,
  'LilCcC': 1,
  'LilCcCI': 1,
  'LilCcCIa': 1,
  'cCL': 2,
  'cCLl': 2,
  'cCLla': 1,
  'ia': 1,
  'I': 28,
  'Ii': 24,
  'IiI': 23,
  'IiIi': 23,
  'IiIiI': 20,
  'IiIiIi': 20,
  'IiIiIiI': 19,
  'IiIiIiIi': 19,
  'IiIiIiIiI': 16,
  'IiIiIiIiIi': 15,
  'IiIiIiIiIiI': 15,
  'IiIiIiIiIiIi': 15,
  'IiIiIiIiIiIia': 2,
  'ca': 14,
  'CcC': 26,
  'CcCc': 23,
  'CcCcC': 16,
  'CcCcCc': 15,
  'CcCcCcC': 8,
  'CcCcCcCa': 1,
  'cL': 14,
  'cLl': 6,
  'cLlL': 2,
  'cLlLD': 1,
  'cLlLDd': 1,
  'cLlLDdE': 1,
  'cLlLDdEe': 1,
  'cLlLDdEeE': 1,
  'cLlLDdEeED': 1,
  'cLlLDdEeEDe': 1,
  'cLlLDdEeEDeE': 1,
  'c

In [24]:
def tree_search(tree, parent_seq, node_char, node_count):
  # tree =   [str, int, list] or [str, int]
  check_layer = tree
  for char in parent_seq:
    for item in check_layer:
      #print("now layer: ", check_layer, "item: ", item, "matching char: ", char, "parent seq: ", parent_seq)
      if type(item) == list and item[0] == char:
        if type(item[-1]) == list:
          #print("append to: ", item, "check layer: ", check_layer, "TRIGGERED HERE**")   # next layer would be deeper in the tree
          check_layer = check_layer[-1]
        else:
          #print("append to: ", item, "check layer: ", check_layer, "TRIGGERED HERE")     # next layer would be the most shallow subtree layer
          item.append([node_char, node_count])
  return tree

In [25]:
def count_nodes(node_dict, node):
  if node in caps or node in lows:
    if node in node_dict:
      node_dict[node] += 1
    else:
      node_dict[node] = 1
  return node_dict

# The Tree

In [26]:
the_tree = list()
count_of_all_nodes = dict()
#root_dict = dict()

print("root nodes", "\t[subtrees]")

for key, item in dict_tree.items(): # key: root node (always caps eg. ABC, one of the 12)
  key_list = item[1].keys()
  sub_tree = list()
  checking = []
  check_now = ""
  count_nodes(count_of_all_nodes, key)
  for sub_key in key_list:  # sub_key: the first alphabet following the root (could be upper/lower case)
    if len(checking) == 0 or len(sub_key) == 1:   # adding the first node after the root
        checking.append(sub_key)    # add into the list of existing nodes (for later checking)
        check_now = sub_key
        sub_tree.append([check_now, item[1][sub_key]])    # adds to subtree
        count_nodes(count_of_all_nodes, item[1][sub_key])
    elif sub_key.startswith(checking[-1]):        # match to the last node, if True the next node should be a child of the last one
      append_node_char = sub_key[len(checking[-1]):]
      if len(append_node_char) == 1:
        #print("sub_key: ", sub_key, "checking: ", check_now, "append: ", append_node_char, "occur: ", item[1][sub_key])
        sub_tree = tree_search(sub_tree, check_now, append_node_char, item[1][sub_key])
        count_nodes(count_of_all_nodes, append_node_char)
      check_now = sub_key
      checking.append(check_now)
    elif sub_key.startswith(checking[-1]) == False:
      re_check_id = -2  #check one more index back
      check_now = checking[re_check_id]
      while check_now != checking[0]:
        if sub_key.startswith(checking[re_check_id]):
          append_node_char = sub_key[len(checking[re_check_id]):]
          sub_tree = tree_search(sub_tree, check_now, append_node_char, item[1][sub_key])
          count_nodes(count_of_all_nodes, append_node_char)
          check_now = sub_key
          checking.append(check_now)
          break
        else:
          #print("*RECHECK*", "sub_key: ", sub_key, "checking: ", check_now, "append: ", "nothing")
          re_check_id -= 1
          check_now = checking[re_check_id]
  the_tree.append([key, count_of_all_nodes[key], sub_tree])
  #root_dict[key] = sub_tree

  print(key, "\t", sub_tree)

print("count of all nodes")
count_of_all_nodes

root nodes 	[subtrees]
A 	 [['L', 185, ['l', 112, ['a', 17]]], ['a', 3205], ['i', 34, ['I', 31, ['i', 29, ['I', 29, ['a', 1]]]]], ['e', 3, ['g', 1, ['f', 1, ['a', 1]]]], ['c', 74, ['C', 43, ['a', 18]]], ['C', 63, ['a', 15]], ['I', 28, ['i', 24, ['I', 23, ['i', 23, ['I', 20, ['i', 20, ['I', 19, ['i', 19, ['I', 16, ['i', 15, ['I', 15, ['i', 15, ['a', 2]]]]]]]]]]]]], ['E', 5, ['e', 4, ['L', 1, ['l', 1, ['a', 1]]]]], ['l', 58, ['L', 38, ['l', 30, ['L', 25, ['l', 18, ['L', 17, ['l', 16, ['L', 11, ['l', 10, ['L', 8, ['l', 7, ['L', 7, ['l', 7, ['L', 6, ['l', 4, ['L', 4, ['l', 4, ['L', 4, ['l', 4, ['L', 3, ['l', 3, ['L', 3, ['l', 3, ['L', 2, ['l', 2, ['a', 1]]]]]]]]]]]]]]]]]]]]]]]]]], ['D', 28, ['d', 26, ['F', 1, ['D', 1, ['d', 1, ['D', 1, ['d', 1, ['f', 1, ['D', 1, ['d', 1, ['l', 1, ['L', 1, ['l', 1, ['L', 1, ['l', 1, ['a', 1]]]]]]]]]]]]]]]], ['F', 1, ['a', 1]], ['k', 70, ['L', 16, ['a', 1]]], ['K', 23, ['k', 20, ['K', 17, ['k', 15, ['L', 1, ['D', 1, ['d', 1, ['l', 1, ['L', 1, ['i', 1, ['D', 

{'A': 4036,
 'B': 212,
 'C': 1901,
 'D': 1332,
 'E': 1960,
 'F': 135,
 'G': 18,
 'H': 49,
 'I': 2965,
 'J': 337,
 'K': 6180,
 'L': 2860,
 'a': 4400,
 'b': 333,
 'c': 1972,
 'd': 1391,
 'e': 1998,
 'f': 173,
 'g': 21,
 'h': 63,
 'i': 3117,
 'j': 351,
 'k': 6266,
 'l': 3190}

In [27]:
the_tree[1]

['L',
 1051,
 [['l', 1999],
  ['D', 293, ['d', 282, ['D', 118, ['d', 117, ['l', 43]]]]],
  ['i', 74, ['D', 19, ['d', 19, ['l', 2]]]],
  ['a', 54, ['l', 27]],
  ['A', 42, ['l', 24]],
  ['F',
   28,
   ['D',
    11,
    ['d',
     11,
     ['D',
      3,
      ['d',
       3,
       ['a',
        1,
        ['D',
         1,
         ['d',
          1,
          ['f',
           1,
           ['A',
            1,
            ['D',
             1,
             ['d',
              1,
              ['F',
               1,
               ['D',
                1,
                ['d',
                 1,
                 ['D',
                  1,
                  ['d',
                   1,
                   ['f', 1, ['D', 1, ['d', 1, ['l', 1]]]]]]]]]]]]]]]]]]]]],
  ['I', 53, ['l', 25]],
  ['G',
   4,
   ['g',
    3,
    ['A',
     1,
     ['a',
      1,
      ['A',
       1,
       ['a',
        1,
        ['A',
         1,
         ['a',
          1,
          ['F',
           1,
       

#Visualisation of the tree

In [28]:
import sys
sys.setrecursionlimit(10000)

In [29]:
pip install treelib

Collecting treelib
  Downloading treelib-1.6.1.tar.gz (24 kB)
Building wheels for collected packages: treelib
  Building wheel for treelib (setup.py) ... [?25l[?25hdone
  Created wheel for treelib: filename=treelib-1.6.1-py3-none-any.whl size=18385 sha256=6e464436e260e0be933613b3b93b3f6b63f6898d7a6d8f7b50b2ef9e6be5e6bb
  Stored in directory: /root/.cache/pip/wheels/89/be/94/2c6d949ce599d1443426d83ba4dc93cd35c0f4638260930a53
Successfully built treelib
Installing collected packages: treelib
Successfully installed treelib-1.6.1


In [30]:
from treelib import Node, Tree

In [31]:
def nested_change(item, func):
    if isinstance(item, list):
        return [nested_change(x, func) for x in item]
    return func(item)

In [32]:
total_count_of_nodes = 0

for key, item in count_of_all_nodes.items():
  total_count_of_nodes += item

print(count_of_all_nodes)
print(total_count_of_nodes)

{'A': 4036, 'l': 3190, 'a': 4400, 'I': 2965, 'i': 3117, 'g': 21, 'f': 173, 'C': 1901, 'c': 1972, 'L': 2860, 'D': 1332, 'd': 1391, 'E': 1960, 'e': 1998, 'F': 135, 'K': 6180, 'k': 6266, 'j': 351, 'J': 337, 'h': 63, 'H': 49, 'G': 18, 'b': 333, 'B': 212}
45260


In [38]:
# we have a problem here
the_tree_for_show = nested_change(the_tree, str)
for thing in the_tree_for_show:
  print(thing[0], thing[1], thing[2])

A 1 [['L', '185', ['l', '112', ['a', '17']]], ['a', '3205'], ['i', '34', ['I', '31', ['i', '29', ['I', '29', ['a', '1']]]]], ['e', '3', ['g', '1', ['f', '1', ['a', '1']]]], ['c', '74', ['C', '43', ['a', '18']]], ['C', '63', ['a', '15']], ['I', '28', ['i', '24', ['I', '23', ['i', '23', ['I', '20', ['i', '20', ['I', '19', ['i', '19', ['I', '16', ['i', '15', ['I', '15', ['i', '15', ['a', '2']]]]]]]]]]]]], ['E', '5', ['e', '4', ['L', '1', ['l', '1', ['a', '1']]]]], ['l', '58', ['L', '38', ['l', '30', ['L', '25', ['l', '18', ['L', '17', ['l', '16', ['L', '11', ['l', '10', ['L', '8', ['l', '7', ['L', '7', ['l', '7', ['L', '6', ['l', '4', ['L', '4', ['l', '4', ['L', '4', ['l', '4', ['L', '3', ['l', '3', ['L', '3', ['l', '3', ['L', '2', ['l', '2', ['a', '1']]]]]]]]]]]]]]]]]]]]]]]]]], ['D', '28', ['d', '26', ['F', '1', ['D', '1', ['d', '1', ['D', '1', ['d', '1', ['f', '1', ['D', '1', ['d', '1', ['l', '1', ['L', '1', ['l', '1', ['L', '1', ['l', '1', ['a', '1']]]]]]]]]]]]]]]], ['F', '1', ['a', '1

In [34]:
root, *tail = the_tree_for_show
tree = Tree()
node = Node(root)
tree.add_node(node)

q = [[node, *tail]]
while q:
    parent, *children = q.pop()
    for child in children:
        if isinstance(child, list):
            head, *tail = child
            node = tree.create_node(head, parent=parent)
            q.append([node, *tail])
        else:
            tree.create_node(child, parent=parent)
tree.show()

TypeError: ignored

#Prediction with reference to tree

In [87]:
def stored_for_prob(target_count_in_term_node, check_seq, target, the_tree):
  for root in the_tree:
    if root[0] == check_seq[0]:
      #print(root[0], root)
      root_subtree = root
      #print("branches to", root[0], ":", len(root[2]))

      check_subseq = check_seq[1:]
      counter = len(check_subseq)
      check_layer = root[2]

      while counter != -1:
        #print("counter: ", counter, "check_subseq: ", check_subseq, "check layer: ", check_layer)
        for id, thing in enumerate(check_layer):
          if counter == 0:
            if thing == target:
              #print("target: ", thing, check_layer[id+1])
              target_count_in_term_node = check_layer[id+1]
              break
          
          if type(thing) == str:
            if thing == check_subseq[0]:
              #print("found match: ", thing, "in node", check_layer)
              # store numbers
              store_for_prob.append((thing, check_layer[1]))
              #print("append:", (thing, check_layer[1]))
          elif type(thing) == int:
            continue
          elif type(thing[-1]) == list:
            if thing[0] == check_subseq[0]:
              check_layer = thing[2]
              #print("found match: ", thing)
              # store nums
              store_for_prob.append((thing[0],thing[1]))
              #print("append:", (thing[0],thing[1]))
        check_subseq = check_subseq[1:]
        counter -= 1
  return(store_for_prob, target_count_in_term_node, root_subtree)

In [101]:
def prob(root, target, check_seq, stored_for_prob):
  target_in_all_nodes = count_of_all_nodes[target]/total_count_of_nodes
  prob = (target_count_in_term_node/root_subtree[1])*target_in_all_nodes
  prob_output = prob

  for item in store_for_prob:
    new_prob = (target_count_in_term_node/item[1])*prob_output
    #print(item, "update:", target_count_in_term_node, "/", item[1], "*", prob_output)
    prob_output = new_prob
    #print("prob:" , prob_output)

  return prob_output

In [104]:
def prob_of_target(check_seq, target, the_tree, total_count_of_nodes):
  root = check_seq[0]
  root_subtree = list()
  store_for_prob = list()
  target_count_in_term_node = 0

  store_for_prob, target_count_in_term_node, root_subtree = stored_for_prob(target_count_in_term_node, check_seq, target, the_tree)

  target_in_all_nodes = count_of_all_nodes[target]/total_count_of_nodes
  #print(target_in_all_nodes)
  '''
  print("Given sequence:", check_seq)
  print("Predict probability for:", target)
  print("subtree node counts stored for calculation:", store_for_prob)
  print("total number of nodes in the tree:", total_count_of_nodes)
  print("count of target node following the subtree:", target_count_in_term_node)
  print("count of target in the entire tree:", count_of_all_nodes[target])
  print(root_subtree[0], root_subtree[1], len(root_subtree[2]))
  '''

  prob_output = prob(root, target, check_seq, stored_for_prob)

  return prob_output

In [54]:
'''
check_seq = "LiDd"
target = "l"
root_subtree = list()

store_for_prob = list()
target_count_in_term_node = 0
'''

'\ncheck_seq = "LiDd"\ntarget = "l"\nroot_subtree = list()\n\nstore_for_prob = list()\ntarget_count_in_term_node = 0\n'

In [71]:
'''
store_for_prob, target_count_in_term_node, root_subtree = stored_for_prob(target_count_in_term_node, check_seq, target, the_tree)
print(store_for_prob)
print(target_count_in_term_node)
'''

'\nstore_for_prob, target_count_in_term_node, root_subtree = stored_for_prob(target_count_in_term_node, check_seq, target, the_tree)\nprint(store_for_prob)\nprint(target_count_in_term_node)\n'

In [105]:
'''
print("Given sequence:", check_seq)
print("Predict probability for:", target)
print("subtree node counts stored for calculation:", store_for_prob)
print("total number of nodes in the tree:", total_count_of_nodes)
print("count of target node following the subtree:", target_count_in_term_node)
print("count of target in the entire tree:", count_of_all_nodes[target])

print(root_subtree[0], root_subtree[1], len(root_subtree[2]))
'''

Given sequence: LiDd
Predict probability for: l
subtree node counts stored for calculation: [('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19)]
total number of nodes in the tree: 45260
count of target node following the subtree: 2
count of target in the entire tree: 3190
L 1051 23


In [107]:
'''
target_in_all_nodes = count_of_all_nodes[target]/total_count_of_nodes
#print(target_in_all_nodes)

prob_output = prob(root, target, check_seq, stored_for_prob)

print("target_count_in_term_node:", target_count_in_term_node)
print("target in all nodes:", count_of_all_nodes[target])
print("prob of target in all nodes:", count_of_all_nodes[target], "/", total_count_of_nodes, "=", target_in_all_nodes)
print("")
'''

'\ntarget_in_all_nodes = count_of_all_nodes[target]/total_count_of_nodes\n#print(target_in_all_nodes)\n\nprob_output = prob(root, target, check_seq, stored_for_prob)\n\nprint("target_count_in_term_node:", target_count_in_term_node)\nprint("target in all nodes:", count_of_all_nodes[target])\nprint("prob of target in all nodes:", count_of_all_nodes[target], "/", total_count_of_nodes, "=", target_in_all_nodes)\nprint("")\n'

In [103]:
check_seq = "LiDd"
target = "l"
the_prob = prob_of_target(check_seq, target, the_tree, total_count_of_nodes)
print(the_prob)

Given sequence: LiDd
Predict probability for: l
subtree node counts stored for calculation: [('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19), ('i', 74), ('D', 19), ('d', 19)]
total number of nodes in the tree: 45260
count of target node following the subtree: 2
count of target in the entire tree: 3190
L 1051 23
2.0896197420758704e-50


# Looping through the dataset for accuracy scores

In [None]:
print("number of episodes:", len(episode_list))
print("length of the sequence:", len(episode_seq))