/
UAI.py
441 lines (389 loc) · 14.5 KB
/
UAI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
from itertools import combinations
import numpy as np
from pyparsing import alphas, Combine, Literal, Optional, nums, Word
from pgmpy.models import BayesianModel, MarkovModel
from pgmpy.factors.discrete import TabularCPD, DiscreteFactor
class UAIReader(object):
"""
Class for reading UAI file format from files or strings.
"""
def __init__(self, path=None, string=None):
"""
Initialize an instance of UAI reader class
Parameters
----------
path : file or str
Path of the file containing UAI information.
string : str
String containing UAI information.
Example
-------
>>> reader = UAIReader('TestUai.uai')
Reference
---------
http://graphmod.ics.uci.edu/uai08/FileFormat
"""
if path:
data = open(path)
self.network = data.read()
elif string:
self.network = string
else:
raise ValueError("Must specify either path or string.")
self.grammar = self.get_grammar()
self.network_type = self.get_network_type()
self.variables = self.get_variables()
self.domain = self.get_domain()
self.edges = self.get_edges()
self.tables = self.get_tables()
def get_grammar(self):
"""
Returns the grammar of the UAI file.
"""
network_name = Word(alphas).setResultsName("network_name")
no_variables = Word(nums).setResultsName("no_variables")
grammar = network_name + no_variables
self.no_variables = int(grammar.parseString(self.network)["no_variables"])
domain_variables = (Word(nums) * self.no_variables).setResultsName(
"domain_variables"
)
grammar += domain_variables
no_functions = Word(nums).setResultsName("no_functions")
grammar += no_functions
self.no_functions = int(grammar.parseString(self.network)["no_functions"])
integer = Word(nums).setParseAction(lambda t: int(t[0]))
for function in range(0, self.no_functions):
scope_grammar = Word(nums).setResultsName("fun_scope_" + str(function))
grammar += scope_grammar
function_scope = grammar.parseString(self.network)[
"fun_scope_" + str(function)
]
function_grammar = ((integer) * int(function_scope)).setResultsName(
"fun_" + str(function)
)
grammar += function_grammar
floatnumber = Combine(
Word(nums) + Optional(Literal(".") + Optional(Word(nums)))
)
for function in range(0, self.no_functions):
no_values_grammar = Word(nums).setResultsName(
"fun_no_values_" + str(function)
)
grammar += no_values_grammar
no_values = grammar.parseString(self.network)[
"fun_no_values_" + str(function)
]
values_grammar = ((floatnumber) * int(no_values)).setResultsName(
"fun_values_" + str(function)
)
grammar += values_grammar
return grammar
def get_network_type(self):
"""
Returns the type of network defined by the file.
Returns
-------
string : str
String containing network type.
Example
-------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_network_type()
'MARKOV'
"""
network_type = self.grammar.parseString(self.network)
return network_type["network_name"]
def get_variables(self):
"""
Returns a list of variables.
Each variable is represented by an index of list.
For example if the no of variables are 4 then the list will be
[var_0, var_1, var_2, var_3]
Returns
-------
list: list of variables
Example
-------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_variables()
['var_0', 'var_1', 'var_2']
"""
variables = []
for var in range(0, self.no_variables):
var_name = "var_" + str(var)
variables.append(var_name)
return variables
def get_domain(self):
"""
Returns the dictionary of variables with keys as variable name
and values as domain of the variables.
Returns
-------
dict: dictionary containing variables and their domains
Example
-------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_domain()
{'var_0': '2', 'var_1': '2', 'var_2': '3'}
"""
domain = {}
var_domain = self.grammar.parseString(self.network)["domain_variables"]
for var in range(0, len(var_domain)):
domain["var_" + str(var)] = var_domain[var]
return domain
def get_edges(self):
"""
Returns the edges of the network.
Returns
-------
set: set containing the edges of the network
Example
-------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_edges()
{('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
"""
edges = []
for function in range(0, self.no_functions):
function_variables = self.grammar.parseString(self.network)[
"fun_" + str(function)
]
if isinstance(function_variables, int):
function_variables = [function_variables]
if self.network_type == "BAYES":
child_var = "var_" + str(function_variables[-1])
function_variables = function_variables[:-1]
for var in function_variables:
edges.append((child_var, "var_" + str(var)))
elif self.network_type == "MARKOV":
function_variables = ["var_" + str(var) for var in function_variables]
edges.extend(list(combinations(function_variables, 2)))
return set(edges)
def get_tables(self):
"""
Returns list of tuple of child variable and CPD in case of Bayesian
and list of tuple of scope of variables and values in case of Markov.
Returns
-------
list : list of tuples of child variable and values in Bayesian
list of tuples of scope of variables and values in case of Markov.
Example
-------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_tables()
[(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']),
(['var_0', 'var_1', 'var_2'],
['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
'1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
"""
tables = []
for function in range(0, self.no_functions):
function_variables = self.grammar.parseString(self.network)[
"fun_" + str(function)
]
if isinstance(function_variables, int):
function_variables = [function_variables]
if self.network_type == "BAYES":
child_var = "var_" + str(function_variables[-1])
values = self.grammar.parseString(self.network)[
"fun_values_" + str(function)
]
tables.append((child_var, list(values)))
elif self.network_type == "MARKOV":
function_variables = ["var_" + str(var) for var in function_variables]
values = self.grammar.parseString(self.network)[
"fun_values_" + str(function)
]
tables.append((function_variables, list(values)))
return tables
def get_model(self):
"""
Returns an instance of Bayesian Model or Markov Model.
Varibles are in the pattern var_0, var_1, var_2 where var_0 is
0th index variable, var_1 is 1st index variable.
Return
------
model: an instance of Bayesian or Markov Model.
Examples
--------
>>> reader = UAIReader('TestUAI.uai')
>>> reader.get_model()
"""
if self.network_type == "BAYES":
model = BayesianModel()
model.add_nodes_from(self.variables)
model.add_edges_from(self.edges)
tabular_cpds = []
for cpd in self.tables:
child_var = cpd[0]
states = int(self.domain[child_var])
arr = list(map(float, cpd[1]))
values = np.array(arr)
values = values.reshape(states, values.size // states)
tabular_cpds.append(TabularCPD(child_var, states, values))
model.add_cpds(*tabular_cpds)
return model
elif self.network_type == "MARKOV":
model = MarkovModel(self.edges)
factors = []
for table in self.tables:
variables = table[0]
cardinality = [int(self.domain[var]) for var in variables]
value = list(map(float, table[1]))
factor = DiscreteFactor(
variables=variables, cardinality=cardinality, values=value
)
factors.append(factor)
model.add_factors(*factors)
return model
class UAIWriter(object):
"""
Class for writing models in UAI.
"""
def __init__(self, model):
"""
Initialize an instance of UAI writer class
Parameters
----------
model: A Bayesian or Markov model
The model to write
"""
if isinstance(model, BayesianModel):
self.network = "BAYES\n"
elif isinstance(model, MarkovModel):
self.network = "MARKOV\n"
else:
raise TypeError("Model must be an instance of Bayesian or Markov model.")
self.model = model
self.no_nodes = self.get_nodes()
self.domain = self.get_domain()
self.functions = self.get_functions()
self.tables = self.get_tables()
def __str__(self):
"""
Returns the UAI file as a string.
"""
self.network += self.no_nodes + "\n"
domain = sorted(self.domain.items(), key=lambda x: (x[1], x[0]))
self.network += " ".join([var[1] for var in domain]) + "\n"
self.network += str(len(self.functions)) + "\n"
for fun in self.functions:
self.network += str(len(fun)) + " "
self.network += " ".join(fun) + "\n"
self.network += "\n"
for table in self.tables:
self.network += str(len(table)) + "\n"
self.network += " ".join(table) + "\n"
return self.network[:-1]
def get_nodes(self):
"""
Adds variables to the network.
Example
-------
>>> writer = UAIWriter(model)
>>> writer.get_nodes()
"""
no_nodes = len(self.model.nodes())
return str(no_nodes)
def get_domain(self):
"""
Adds domain of each variable to the network.
Example
-------
>>> writer = UAIWriter(model)
>>> writer.get_domain()
"""
if isinstance(self.model, BayesianModel):
cpds = self.model.get_cpds()
cpds.sort(key=lambda x: x.variable)
domain = {}
for cpd in cpds:
domain[cpd.variable] = str(cpd.variable_card)
return domain
elif isinstance(self.model, MarkovModel):
factors = self.model.get_factors()
domain = {}
for factor in factors:
variables = factor.variables
for var in variables:
if var not in domain:
domain[var] = str(factor.get_cardinality([var])[var])
return domain
else:
raise TypeError("Model must be an instance of Markov or Bayesian model.")
def get_functions(self):
"""
Adds functions to the network.
Example
-------
>>> writer = UAIWriter(model)
>>> writer.get_functions()
"""
if isinstance(self.model, BayesianModel):
cpds = self.model.get_cpds()
cpds.sort(key=lambda x: x.variable)
variables = sorted(self.domain.items(), key=lambda x: (x[1], x[0]))
functions = []
for cpd in cpds:
child_var = cpd.variable
evidence = cpd.variables[:0:-1]
function = [
str(variables.index((var, self.domain[var]))) for var in evidence
]
function.append(
str(variables.index((child_var, self.domain[child_var])))
)
functions.append(function)
return functions
elif isinstance(self.model, MarkovModel):
factors = self.model.get_factors()
functions = []
variables = sorted(self.domain.items(), key=lambda x: (x[1], x[0]))
for factor in factors:
scope = factor.scope()
function = [
str(variables.index((var, self.domain[var]))) for var in scope
]
functions.append(function)
return functions
else:
raise TypeError("Model must be an instance of Markov or Bayesian model.")
def get_tables(self):
"""
Adds tables to the network.
Example
-------
>>> writer = UAIWriter(model)
>>> writer.get_tables()
"""
if isinstance(self.model, BayesianModel):
cpds = self.model.get_cpds()
cpds.sort(key=lambda x: x.variable)
tables = []
for cpd in cpds:
values = list(map(str, cpd.values.ravel()))
tables.append(values)
return tables
elif isinstance(self.model, MarkovModel):
factors = self.model.get_factors()
tables = []
for factor in factors:
values = list(map(str, factor.values.ravel()))
tables.append(values)
return tables
else:
raise TypeError("Model must be an instance of Markov or Bayesian model.")
def write_uai(self, filename):
"""
Write the xml data into the file.
Parameters
----------
filename: Name of the file.
Examples
-------
>>> writer = UAIWriter(model)
>>> writer.write_xmlbif(test_file)
"""
writer = self.__str__()
with open(filename, "w") as fout:
fout.write(writer)