-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
303 lines (235 loc) · 11.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#-----------------------------------------------------
# fullname : s.m.mehrabul islam
# roll : sh-86
# email : smmehrabul-2017614964@cs.du.ac.bd
# institute : university of dhaka, bangladesh
# session : 2017-2018
#-----------------------------------------------------
import os
import math
import sys
import statistics
from data_structures.double_hash import DoubleHash as DoubleHash
from data_structures.red_black_tree import RedBlackTree as RedBlackTree
# data structure path : ./data_structures/
#-----------------------------------------------------
# double hash : double_hash.py
# red black tree : red_black_tree.py
#-----------------------------------------------------
DATA_PATH = os.path.join(os.path.abspath(os.getcwd()), "data")
KEY_SET_FILE = os.path.join(DATA_PATH, "key_set.txt")
INSERT_SEARCH_SEQUENCE_FILE = os.path.join(DATA_PATH, "insert_search_sequence.txt")
PERFORMANCE_PATH = os.path.join(os.path.abspath(os.getcwd()), "performance")
DOUBLE_HASH_PERFORMANCE_DATA_FILE = os.path.join(PERFORMANCE_PATH, "double_hash_performance.txt")
RED_BLACK_TREE_PERFORMANCE_DATA_FILE = os.path.join(PERFORMANCE_PATH, "red_black_tree_performance.txt")
# data path : ./data/
#-----------------------------------------------------
# key set : key_set.txt
# insert-search sequence : insert_search_sequence.txt
#-----------------------------------------------------
INSERT_OPCODE = 0
SEARCH_OPCODE = 1
KEY_SET = []
INSERT_SEARCH_SEQUENCE = []
DOUBLE_HASH_PERFORMANCE_DATA = []
DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID = []
RED_BLACK_TREE_PERFORMANCE_DATA = []
RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID = []
NUMBER_OF_DEMO_DATA = 100
DATA_SCATTER_RANGE = 50
DOUBLE_HASH_DATA_POINT = "*"
RED_BLACK_TREE_DATA_POINT = "."
def initialize_files():
with open(DOUBLE_HASH_PERFORMANCE_DATA_FILE, "w") as file_object:
file_object.write("")
with open(RED_BLACK_TREE_PERFORMANCE_DATA_FILE, "w") as file_object:
file_object.write("")
def read_csv(file_name):
with open(file_name, "r") as file_object:
values = file_object.read().split(",")
values = values[:-1]
return values
def append_csv(file_name, value):
with open(file_name, "a") as file_object:
file_object.write(str(value) + ",")
def read_commands(file_name):
with open(file_name, "r") as file_object:
command_lines = file_object.read().split("\n")
commands = []
for command_line in command_lines:
command = command_line.split(" ")
commands.append(command)
return commands
def load_datasets():
global KEY_SET
global INSERT_SEARCH_SEQUENCE
KEY_SET = read_csv(KEY_SET_FILE)
INSERT_SEARCH_SEQUENCE = read_commands(INSERT_SEARCH_SEQUENCE_FILE)
def perform_double_hashing():
global KEY_SET
global INSERT_SEARCH_SEQUENCE
global DOUBLE_HASH_PERFORMANCE_DATA
opcode = -1
key = -1
index = -1
probe = -1
table_size = math.ceil(len(KEY_SET)*1.2)
double_hash_table = DoubleHash(table_size)
for command in INSERT_SEARCH_SEQUENCE:
opcode = int(command[0])
key = int(command[1])
if(opcode == INSERT_OPCODE):
index, probe = double_hash_table.insert(key)
elif(opcode == SEARCH_OPCODE):
index, probe = double_hash_table.search(key)
if(index != -1):
DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID.append(probe)
DOUBLE_HASH_PERFORMANCE_DATA.append(probe)
append_csv(DOUBLE_HASH_PERFORMANCE_DATA_FILE, probe)
# print(str(index) + " " + str(probe))
# print(DOUBLE_HASH_PERFORMANCE_DATA)
return
def perform_red_black_tree():
global INSERT_SEARCH_SEQUENCE
global RED_BLACK_TREE_PERFORMANCE_DATA
red_black_tree = RedBlackTree()
opcode = -1
key = -1
node_key = -1
number_of_inspection = -1
for command in INSERT_SEARCH_SEQUENCE:
opcode = int(command[0])
key = int(command[1])
if(opcode == INSERT_OPCODE):
response, number_of_inspection = red_black_tree.insert_node(key)
elif(opcode == SEARCH_OPCODE):
response, number_of_inspection = red_black_tree.search_node(key)
if(response != -1):
RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID.append(number_of_inspection)
RED_BLACK_TREE_PERFORMANCE_DATA.append(number_of_inspection)
append_csv(RED_BLACK_TREE_PERFORMANCE_DATA_FILE, number_of_inspection)
# print(str(response) + " " + str(number_of_inspection))
# print(RED_BLACK_TREE_PERFORMANCE_DATA)
return
def clear_console():
# windows
if os.name == 'nt':
os.system('cls')
# linux/mac
else:
os.system('clear')
def descriptive_comparison():
# double hashing
mean_double_hashing = round(statistics.mean(DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID), 3)
stdev_double_hashing = round(statistics.stdev(DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID), 3)
max_double_hashing = round(max(DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID), 3)
min_double_hashing = round(min(DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID), 3)
# red black tree
mean_red_black_tree = round(statistics.mean(RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID), 3)
stdev_red_black_tree = round(statistics.stdev(RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID), 3)
max_red_black_tree = round(max(RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID), 3)
min_red_black_tree = round(min(RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID), 3)
print('{:<35}'.format("Descriptive Comparison"))
print('{:<35}'.format("========================"))
print()
print("We've calculated min, max, mean & standard deviation for both of the algorithms.\nComparison between those performance measures are given below:\n")
print('{:<50}'.format("------------------------------------------------------------------------"))
print(' {:<15} {:<5} {:<5} {:<10} {:<10}'.format("", "Min", "Max", "Mean", "Standard Deviation"))
print('{:<50}'.format("------------------------------------------------------------------------"))
print(' {:<15} {:<5} {:<5} {:<10} {:<10}'.format("Double Hashing", min_double_hashing, max_double_hashing, mean_double_hashing, stdev_double_hashing))
print(' {:<15} {:<5} {:<5} {:<10} {:<10}'.format("Red Black Tree", min_red_black_tree, max_red_black_tree, mean_red_black_tree, stdev_red_black_tree))
print('{:<50}'.format("------------------------------------------------------------------------"))
print()
print("So, the above table indicates that the red-black-tree can definitely outperform\ndouble-hashing in some worst case scenario. As the maximum performance measure\nof double hashing is worse than the one of red-black-tree.\n")
print("But the mean performance measure of double-hashing is way better than that of\nred-black-tree. Thus, in general (average case scenarios), the double-hashing\nwill always outperform red-black-tree.\n\n")
def tabular_comparison():
global INSERT_SEARCH_SEQUENCE
print('{:<35}'.format("Tabular Comparison"))
print('{:<35}'.format("========================"))
print()
print(f"As we've generated the insert-search sequence randomly\nif we take the performance measures of the last {NUMBER_OF_DEMO_DATA} \ninsert-search sequence for both of the algorithms, we\nshould get a useful comparison between both algorithms\nbased on some random insert-search sequence.\n\nBecause taking last 100 insert-search will ensure that\nwe are comparing between a largely populated double\nhash table & a largely populated red black tree over a\nrandom set of operations on some random set of data.\n\nSo, the comparison is given below:")
print()
print('{:<50}'.format("---------------------------------------------------------------"))
print(' {:<5} {:<10} {:<20} {:<20}'.format("SL", "OP", "Double Hashing", "Red Black Tree"))
print('{:<50}'.format("---------------------------------------------------------------"))
offset = len(DOUBLE_HASH_PERFORMANCE_DATA)-NUMBER_OF_DEMO_DATA
index = 0
while(index<NUMBER_OF_DEMO_DATA):
sl = offset+index+1
op = "INSERT" if (INSERT_SEARCH_SEQUENCE[offset+index][0] == str(INSERT_OPCODE)) else "SEARCH"
number_of_probes = DOUBLE_HASH_PERFORMANCE_DATA[offset+index]
number_of_inspection = RED_BLACK_TREE_PERFORMANCE_DATA[offset+index]
print(' {:<5} {:<10} {:<20} {:<20}'.format(sl, op , number_of_probes, number_of_inspection))
index += 1
print('{:<50}'.format("---------------------------------------------------------------"))
print()
print()
def visual_comparison():
double_hash_demo_data = DOUBLE_HASH_PERFORMANCE_DATA_ONLY_VALID[((-1)*NUMBER_OF_DEMO_DATA)::]
red_black_tree_demo_data = RED_BLACK_TREE_PERFORMANCE_DATA_ONLY_VALID[((-1)*NUMBER_OF_DEMO_DATA)::]
max_double_hashing = round(max(double_hash_demo_data), 3)
min_double_hashing = round(min(double_hash_demo_data), 3)
max_red_black_tree = round(max(red_black_tree_demo_data), 3)
min_red_black_tree = round(min(red_black_tree_demo_data), 3)
min_data = min(min_double_hashing, min_red_black_tree)
max_data = max(max_double_hashing, max_red_black_tree)
print('{:<35}'.format("Visual Comparison"))
print('{:<35}'.format("========================"))
print()
print(f"Visualizing performance measures for the last {NUMBER_OF_DEMO_DATA} insert-search sequence.\n")
print('{:<50}'.format("------------------------------------------------------------------------"))
print(' {:<15}: {:<25} ({:})'.format("Double Hashing", f"Number of Probes", DOUBLE_HASH_DATA_POINT))
print(' {:<15}: {:<25} ({:})'.format("Red Black Tree", f"Number of Inspections", RED_BLACK_TREE_DATA_POINT))
print('{:<50}'.format("------------------------------------------------------------------------"))
print(' {:<15}| {:<30}'.format("Data Value", "Data Scatter"))
print('{:<50}'.format("------------------------------------------------------------------------"))
data_value = min_data
while(data_value<max_data):
print(' {:<15}| '.format(data_value, ""), end="")
indices = [index for (index, item) in enumerate(double_hash_demo_data) if item == data_value]
data_count = 0
for index in indices:
if(data_count==DATA_SCATTER_RANGE):
data_count = 0
print()
print(' {:<15} '.format("", ""), end="")
print(DOUBLE_HASH_DATA_POINT, end="")
data_count += 1
indices = [index for (index, item) in enumerate(red_black_tree_demo_data) if item == data_value]
for index in indices:
if(data_count==DATA_SCATTER_RANGE):
data_count = 0
print()
print(' {:<15} '.format("", ""), end="")
print(RED_BLACK_TREE_DATA_POINT, end="")
data_count += 1
print()
data_value += 1
print('{:<50}'.format("------------------------------------------------------------------------"))
print()
def performance_comparison():
clear_console()
print()
print('{:<35}'.format("=================================="))
print('{:<35}'.format("Double Hashing VS Red Black Tree"))
print('{:<35}'.format("=================================="))
print()
print("Here, the units of performance measurements are:")
print()
print('{:<15}: {:<15}'.format("Double Hashing", "Number of Probes"))
print('{:<15}: {:<15}'.format("Red Black Tree", "Number of Inspections"))
print()
print()
descriptive_comparison()
tabular_comparison()
visual_comparison()
print("Please scroll up to read the entire performance evaluation.")
print("Thank you\n")
def main():
initialize_files()
load_datasets()
perform_double_hashing()
perform_red_black_tree()
performance_comparison()
if __name__ =="__main__":
main()