-
Notifications
You must be signed in to change notification settings - Fork 87
/
basic.py
2020 lines (1742 loc) · 70.5 KB
/
basic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Defines Linkers that deal with C implementations.
"""
import logging
import os
import sys
from collections import defaultdict
from copy import copy
from io import StringIO
import numpy as np
from aesara.compile.compilelock import lock_ctx
from aesara.configdefaults import config
from aesara.graph.basic import Constant, NoParams, io_toposort, vars_between
from aesara.graph.callcache import CallCache
from aesara.link.basic import Container, Linker, LocalLinker, PerformLinker
from aesara.link.c.cmodule import (
METH_VARARGS,
DynamicModule,
ExtFunction,
GCC_compiler,
dlimport_workdir,
)
from aesara.link.c.cmodule import get_module_cache as _get_module_cache
from aesara.link.c.interface import CLinkerObject, CLinkerOp, CLinkerType
from aesara.link.utils import gc_helper, map_storage, raise_with_op, streamline
from aesara.utils import difference, uniq
_logger = logging.getLogger("aesara.link.c.basic")
run_cthunk = None # Will be imported only when needed.
def get_module_cache(init_args=None):
"""
Parameters
----------
init_args
If not None, the (k, v) pairs in this dictionary will be forwarded to
the ModuleCache constructor as keyword arguments.
"""
return _get_module_cache(config.compiledir, init_args=init_args)
_persistent_module_cache = None
def get_persistent_module_cache():
global _persistent_module_cache
if _persistent_module_cache is None:
_persistent_module_cache = CallCache(
os.path.join(config.compiledir, "persistent_cache")
)
return _persistent_module_cache
class CodeBlock:
"""
Represents a computation unit composed of declare, behavior, and cleanup.
The constructor initializes a L{CodeBlock} with templatized declare,
behavior and cleanup. The sub parameter will be used in the other
arguments' templates. sub should contain a key called 'id' that maps to an
identifier for this block. The identifier will be used to determine the
failure code and a label to jump to. It should also contain a key called
'failure_var' that contains the name of the variable that contains the error
code.
Parameters
----------
declare
C code that declares variables for use by the computation.
behavior
C code that performs the computation.
cleanup
C code that cleans up things allocated or incref-ed in behavior.
"""
def __init__(self, declare, behavior, cleanup, sub):
self.declare = declare
self.behavior = behavior
# the dummy is because gcc throws an error when a label's
# right next to a closing brace (maybe there's an ignore flag
# for that...)
# we need the label even if cleanup is empty because the
# behavior block jumps there on failure
self.cleanup = (
"__label_%(id)i:\n" % sub + cleanup + "\ndouble __DUMMY_%(id)i;\n" % sub
) # % sub
def failure_code(sub, use_goto=True):
"""
Code contained in sub['fail'], usually substituted for %(fail)s.
It sets information about current error, then goto the code
actually handling the failure, which is defined in struct_gen().
Parameters
----------
sub: dict
Contains other code snippets that can be substituted,
in particular 'failure_var' and 'id'.
use_goto: bool, True by default
Include a "goto" statement to the failure label.
Passing False is sometimes required, in which cases we have to
be careful to avoid executing incorrect code.
"""
if use_goto:
goto_statement = "goto __label_%(id)i;" % sub
else:
goto_statement = ""
return """{
%(failure_var)s = %(id)i;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
%(goto_statement)s}""" % dict(
sub, goto_statement=goto_statement
)
def failure_code_init(sub):
"""
Code for failure in the struct init.
Parameters:
----------
sub
Dictionary used to template the struct.
* failure_var -> must contain a variable name to use for
the failure code.
"""
return (
"""{
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
return %(id)d;
}"""
% sub
)
def code_gen(blocks):
"""
From a list of L{CodeBlock} instances, returns a string
that executes them all in sequence.
Eg for C{(decl1, task1,
cleanup1)} and C{(decl2, task2, cleanup2)} the returned string
will be of the form:
decl1
decl2
{
task1
{
task2
cleanup2
}
cleanup1
}
Parameters:
----------
blocks
List of CodeBlock instances such that
* declarations, behavior and cleanup are in the run()
method of the struct
"""
decl = ""
head = ""
tail = ""
for block in blocks:
decl += block.declare
head = f"{head}\n{{\n{block.behavior}"
tail = f"{block.cleanup}\n}}\n{tail}"
return decl + head + tail
def struct_gen(args, struct_builders, blocks, sub):
"""
Generates a struct conforming to the following specifications:
Parameters
----------
args
All of the PyObject* type, stored in the struct
they represent the storage and must be length 1 python lists.
struct_builders
List of L{CodeBlock} instances such that
* declarations are in the struct
* behavior is in the constructor
* cleanup is in the destructor
blocks
List of CodeBlock instances such that
* declarations, behavior and cleanup are in the run()
method of the struct
sub
Dictionary used to template the struct.
* failure_var -> must contain a variable name to use for
the failure code.
Returns
-------
object
In a nutshell, this returns code for a struct that represents
a function with state. The state's initialization and destruction
are handled by struct_builders and the actual behavior of the
function is handled by blocks.
"""
struct_decl = ""
struct_init_head = ""
struct_init_tail = ""
struct_cleanup = ""
for block in struct_builders:
# decl are declarations that go in the struct
# init_head are in the constructor
# init_tail and cleanup do the same thing, but the former will
# be executed if any step in the constructor fails and the
# latter only at destruction time.
struct_decl += block.declare
struct_init_head = struct_init_head + f"\n{block.behavior}"
struct_cleanup += block.cleanup
behavior = code_gen(blocks)
# declares the storage
storage_decl = "\n".join([f"PyObject* {arg};" for arg in args])
# in the constructor, sets the storage to the arguments
storage_set = "\n".join([f"this->{arg} = {arg};" for arg in args])
# increments the storage's refcount in the constructor
storage_incref = "\n".join([f"Py_XINCREF({arg});" for arg in args])
# decrements the storage's refcount in the destructor
storage_decref = "\n".join([f"Py_XDECREF(this->{arg});" for arg in args])
args_names = ", ".join(args)
args_decl = ", ".join([f"PyObject* {arg}" for arg in args])
# The following code stores the exception data in __ERROR, which
# is a special field of the struct. __ERROR is a list of length 3
# that holds the type, the value and the traceback. After storing
# the error, we return the failure code so we know which code
# block failed.
do_return = (
"""
if (%(failure_var)s) {
// When there is a failure, this code puts the exception
// in __ERROR.
PyObject* err_type = NULL;
PyObject* err_msg = NULL;
PyObject* err_traceback = NULL;
PyErr_Fetch(&err_type, &err_msg, &err_traceback);
if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
PyList_SET_ITEM(__ERROR, 0, err_type);
PyList_SET_ITEM(__ERROR, 1, err_msg);
PyList_SET_ITEM(__ERROR, 2, err_traceback);
{Py_XDECREF(old_err_type);}
{Py_XDECREF(old_err_msg);}
{Py_XDECREF(old_err_traceback);}
}
// The failure code is returned to index what code block failed.
return %(failure_var)s;
"""
% sub
)
sub = dict(sub)
sub.update(locals())
# TODO: add some error checking to make sure storage_<x> are
# 1-element lists and __ERROR is a 3-elements list.
struct_code = (
"""
namespace {
struct %(name)s {
PyObject* __ERROR;
%(storage_decl)s
%(struct_decl)s
%(name)s() {
// This is only somewhat safe because we:
// 1) Are not a virtual class
// 2) Do not use any virtual classes in the members
// 3) Deal with mostly POD and pointers
// If this changes, we would have to revise this, but for
// now I am tired of chasing segfaults because
// initialization code had an error and some pointer has
// a junk value.
#ifndef AESARA_DONT_MEMSET_STRUCT
memset(this, 0, sizeof(*this));
#endif
}
~%(name)s(void) {
cleanup();
}
int init(PyObject* __ERROR, %(args_decl)s) {
%(storage_incref)s
%(storage_set)s
%(struct_init_head)s
this->__ERROR = __ERROR;
return 0;
}
void cleanup(void) {
%(struct_cleanup)s
%(storage_decref)s
}
int run(void) {
int %(failure_var)s = 0;
%(behavior)s
%(do_return)s
}
};
}
"""
% sub
)
return struct_code
# The get_<x> functions complete the return value of r.get_<x>()
# with handling of the py_<name> variable.
def get_nothing(fgraph, r, name, sub):
"""
WRITEME
"""
return ""
def get_c_declare(fgraph, r, name, sub):
"""
Wrapper around c_declare that declares py_name.
"""
# The declaration will be used by the Apply node that
# is computing it (`r.owner`), and by each of the clients.
# If some of these have `check_input=True` in their `.op`,
# it means they need `r`'s dtype to be declared, so
# we have to pass `check_input=True` to `c_declare`.
if any(
[
getattr(c.op, "check_input", config.check_input)
for (c, _) in fgraph.clients[r]
if not isinstance(c, str)
]
) or (r.owner and getattr(r.owner.op, "check_input", config.check_input)):
c_declare = r.type.c_declare(name, sub, True)
else:
c_declare = r.type.c_declare(name, sub, False)
pre = f"""
PyObject* py_{name};
"""
return pre + c_declare
def get_c_init(fgraph, r, name, sub):
"""
Wrapper around c_init that initializes py_name to Py_None.
"""
pre = (
""
"""
py_%(name)s = Py_None;
{Py_XINCREF(py_%(name)s);}
"""
% locals()
)
return pre + r.type.c_init(name, sub)
def get_c_extract(fgraph, r, name, sub):
"""
Wrapper around c_extract that initializes py_name from storage.
"""
# `c_extract` is called when getting the value of an apply node's
# input from the compute map, before being used by its clients.
# If one of the clients has `check_input=True`, we need to perform
# checks on the variable.
# However that code is not used by C code of the apply node creating
# this variable, so there is no need to check `r.owner.op.check_input`.
if any(
[
getattr(c.op, "check_input", config.check_input)
for (c, _) in fgraph.clients[r]
if not isinstance(c, str)
]
):
# check_broadcast is just an hack to easily remove just the
# broadcast check on the old GPU back-end. This check isn't
# done in the new GPU back-end or on the CPU.
if any(
[
getattr(c.op, "check_broadcast", True)
for (c, _) in fgraph.clients[r]
if not isinstance(c, str)
]
):
c_extract = r.type.c_extract(name, sub, True)
else:
c_extract = r.type.c_extract(name, sub, True, check_broadcast=False)
else:
c_extract = r.type.c_extract(name, sub, False)
pre = (
"""
py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
{Py_XINCREF(py_%(name)s);}
"""
% locals()
)
return pre + c_extract
def get_c_extract_out(fgraph, r, name, sub):
"""
Wrapper around c_extract_out that initializes py_name from storage.
"""
# `c_extract_out` is used to extract an output variable from
# the compute map, to be used as pre-allocated memory for `r`
# before its value gets computed.
# If the node producing `r` has `check_input=True`, it may
# also perform type checks on the initial value of the output,
# so we need to pass `check_input=True` to `c_extract_out`.
# However, that code is not used by potential clients of `r`,
# so we do not need to check them.
check_input = getattr(r.owner.op, "check_input", config.check_input)
# check_broadcast is just an hack to easily remove just the
# broadcast check on the old GPU back-end. This check isn't
# done in the new GPU back-end or on the CPU.
if getattr(r.owner.op, "check_broadcast", True):
c_extract = r.type.c_extract_out(name, sub, check_input)
else:
c_extract = r.type.c_extract_out(name, sub, check_input, check_broadcast=False)
pre = (
"""
py_%(name)s = PyList_GET_ITEM(storage_%(name)s, 0);
{Py_XINCREF(py_%(name)s);}
"""
% locals()
)
return pre + c_extract
def get_c_cleanup(fgraph, r, name, sub):
"""
Wrapper around c_cleanup that decrefs py_name.
"""
post = (
"""
{Py_XDECREF(py_%(name)s);}
"""
% locals()
)
return r.type.c_cleanup(name, sub) + post
def get_c_sync(fgraph, r, name, sub):
"""
Wrapper around c_sync that syncs py_name with storage.
"""
return """
if (!%(failure_var)s) {
%(sync)s
PyObject* old = PyList_GET_ITEM(storage_%(name)s, 0);
{Py_XINCREF(py_%(name)s);}
PyList_SET_ITEM(storage_%(name)s, 0, py_%(name)s);
{Py_XDECREF(old);}
}
""" % dict(
sync=r.type.c_sync(name, sub), name=name, **sub
)
def apply_policy(fgraph, policy, r, name, sub):
"""
Apply the list of policies to name.r,sub
Parameters
----------
policy
List of functions that map a L{Variable} to a string,
or a single such function.
r: L{Variable}
Returns
-------
object
C{policy[0](r) + policy[1](r) + ...}.
"""
if isinstance(policy, (list, tuple)):
ret = ""
for sub_policy in policy:
ret += sub_policy(fgraph, r, name, sub)
return ret
return policy(fgraph, r, name, sub)
def struct_variable_codeblocks(fgraph, variable, policies, id, symbol_table, sub):
"""
Update "sub" dict and create two codeblocks with different failure modes
Parameters
----------
variable : a Variable
policies : a pair of tuples
(declare_policy, behavior_policy, cleanup_policy) -- at construction.
(declare_policy, behavior_policy, cleanup_policy)) -- at execution.
The first list will produce an element of the 'struct_builders' argument
in struct_gen. The second list will produce an element of the 'blocks'
argument in struct_gen.
id
The id assigned to this variable's task in the computation.
symbol_table
A dict that maps variables to variable names. It is not read by this
function but a variable name for the variable is computed and added to
the table.
sub
Dictionary for use by L{CodeBlock}.
"""
name = f"V{id}"
if variable not in symbol_table:
symbol_table[variable] = name
sub = dict(sub)
# sub['name'] = name
sub["id"] = id
sub["fail"] = failure_code_init(sub)
sub["py_ptr"] = f"py_{name}"
sub["stor_ptr"] = f"storage_{name}"
# struct_declare, struct_behavior, struct_cleanup, sub)
struct_builder = CodeBlock(
*[apply_policy(fgraph, policy, variable, name, sub) for policy in policies[0]]
+ [sub]
)
sub["id"] = id + 1
sub["fail"] = failure_code(sub)
sub["py_ptr"] = f"py_{name}"
sub["stor_ptr"] = f"storage_{name}"
# run_declare, run_behavior, run_cleanup, sub)
block = CodeBlock(
*[apply_policy(fgraph, policy, variable, name, sub) for policy in policies[1]]
+ [sub]
)
return struct_builder, block
class CLinker(Linker):
"""
Creates C code for an fgraph, compiles it and returns callables
through make_thunk and make_function that make use of the compiled
code.
no_recycling can contain a list of Variables that belong to the fgraph.
If a Variable is in no_recycling, CLinker will clear the output storage
associated to it during the computation (to avoid reusing it).
"""
def __init__(self, schedule=None):
self.fgraph = None
super().__init__(scheduler=schedule)
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Associate linker with fgraph
"""
if no_recycling is None:
no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph:
# A linker can be tied to only one FunctionGraph.
return type(self)(self.schedule).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph
self.fetch_variables()
self.no_recycling = no_recycling
return self
def fetch_variables(self):
"""
Fills the inputs, outputs, variables, orphans, temps and node_order
fields.
"""
fgraph = self.fgraph
self.inputs = fgraph.inputs
self.outputs = fgraph.outputs
self.node_order = self.schedule(fgraph)
# list(fgraph.variables)
# We need to include the unused inputs in our variables,
# otherwise we can't pass them to the module.
self.variables = [var for var in self.inputs if not len(fgraph.clients[var])]
self.variables += list(vars_between(self.inputs, self.outputs))
# This adds a hidden input which is the params for each node
# that needs it
self.node_params = dict()
for node in self.node_order:
params = node.run_params()
if params is not NoParams:
# try to avoid creating more than one variable for the
# same params.
if params in self.node_params:
var = self.node_params[params]
assert var.type == node.params_type
fgraph.clients[var].append((node, "params"))
else:
var = Constant(node.params_type, params)
fgraph.clients[var] = [(node, "params")]
self.node_params[params] = var
self.variables.append(var)
# The orphans field is listified to ensure a consistent order.
# list(fgraph.orphans.difference(self.outputs))
self.orphans = list(
r
for r in self.variables
if isinstance(r, Constant) and r not in self.inputs
)
# C type constants (aesara.scalar.Scalar). They don't request an object
self.consts = []
# Move c type from orphans (aesara.scalar.Scalar) to self.consts
for variable in self.orphans:
if (
isinstance(variable, Constant)
and isinstance(variable.type, CLinkerType)
# This apparently checks--in a somewhat "dynamic" way--whether
# or not a literal value is available (in C).
and variable.type.c_literal(variable.data)
):
self.consts.append(variable)
self.orphans.remove(variable)
self.temps = list(
set(self.variables)
.difference(self.inputs)
.difference(self.outputs)
.difference(self.orphans)
)
def code_gen(self):
"""
Generates code for a struct that does the computation of the fgraph and
stores it in the struct_code field of the instance.
If reuse_storage is True, outputs and temporaries will be stored in
the struct so they can be reused each time a function returned by
make_function is called, which means that the output of a call will
be invalidated by the next. If reuse_storage is False, that problem
is avoided.
This method caches its computations.
"""
if getattr(self, "struct_code", False):
return self.struct_code
no_recycling = self.no_recycling
c_support_code_apply = []
c_init_code_apply = []
symbol = {}
# (init_)tasks contains a list of pairs (Op/Variable, task_name)
# e.g. (x, 'get') or (x+y, 'code')
init_tasks = []
tasks = []
# (init_)blocks contain CodeBlock instances. There is a direct
# correspondance with (init_)tasks.
init_blocks = []
blocks = []
failure_var = "__failure"
id = 1
for variable in self.variables:
if not isinstance(variable.type, CLinkerType):
raise NotImplementedError(f"Type of {variable} cannot produce C code")
sub = dict(failure_var=failure_var)
# it might be possible to inline constant variables as C literals
# policy = [[what to declare in the struct,
# what to do at construction,
# what to do at destruction],
# [what to declare in each run,
# what to do at the beginning of each run,
# what to do at the end of each run]]
if variable in self.consts:
symbol[variable] = "(" + variable.type.c_literal(variable.data) + ")"
continue
elif variable in self.inputs:
# We need to extract the new inputs at each run
# they do not need to be relayed to Python, so we don't sync.
# If the variable is both an input and an output, there is
# no need to synchronize either, it is already up-to-date.
policy = [
[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_extract, get_c_cleanup],
]
elif variable in self.orphans:
if not isinstance(variable, Constant):
raise TypeError(
"All orphans to CLinker must be Constant instances. "
f"Got {variable}"
)
# orphans are not inputs so we'll just get fetch them
# when we initialize the struct and assume they stay
# the same
policy = [
[get_c_declare, get_c_extract, get_c_cleanup],
[get_nothing, get_nothing, get_nothing],
]
elif variable in self.temps:
# temps don't need to be extracted from Python, so we
# call c_init rather than c_extract they do not need
# to be relayed to Python, so we don't sync
if variable.type.c_is_simple() or variable in no_recycling:
policy = [
[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_init, get_c_cleanup],
]
else:
# it is useful for complex temps to reuse storage
# at each run, so we only clean up in the
# destructor
policy = [
[get_c_declare, get_c_init, get_c_cleanup],
[get_nothing, get_nothing, get_nothing],
]
elif variable in self.outputs:
if variable.type.c_is_simple() or variable in no_recycling:
# Do not extract output from Python
policy = [
[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_init, (get_c_sync, get_c_cleanup)],
]
else:
# We try to use the output that is pre-allocated.
# The linker will usually just reuse the storage
# from last run, but in the first execution,
# it will be None.
# We clean-up at each run to enable garbage collection
# in the Linker.
policy = [
[get_nothing, get_nothing, get_nothing],
[get_c_declare, get_c_extract_out, (get_c_sync, get_c_cleanup)],
]
else:
raise Exception(
"this shouldn't be possible, please report this exception"
)
builder, block = struct_variable_codeblocks(
self.fgraph, variable, policy, id, symbol, sub
)
# each Variable generates two CodeBlocks, one to
# declare/initialize/destroy struct variables and the
# other to declare/extract/cleanup each time the function
# is run.
# Typically, only one of the two actually does anything
# (see all the possible combinations above)
init_tasks.append((variable, "init", id))
init_blocks.append(builder)
tasks.append((variable, "get", id + 1))
blocks.append(block)
id += 2
for node_num, node in enumerate(self.node_order):
op = node.op
if not isinstance(op, CLinkerOp):
raise NotImplementedError(f"{op} cannot produce C code")
sub = dict(failure_var=failure_var)
params = node.run_params()
if params is not NoParams:
params_var = symbol[self.node_params[params]]
# The placeholder will be replaced by a hash of the entire
# code (module + support code) in DynamicModule.code.
# This ensures that, when defining functions in support code,
# we cannot have two different functions, in different modules,
# that have the same name.
name = f"node_<<<<HASH_PLACEHOLDER>>>>_{node_num}"
isyms = [symbol[r] for r in node.inputs]
osyms = [symbol[r] for r in node.outputs]
# Make the CodeBlock for c_code
sub["id"] = id
sub["fail"] = failure_code(sub)
if params is not NoParams:
sub["params"] = params_var
sub_struct = dict()
sub_struct["id"] = id + 1
sub_struct["fail"] = failure_code_init(sub)
if params is not NoParams:
# Since params inputs are always constants they are
# guaranteed to be available in the struct init code.
sub_struct["params"] = params_var
c_support_code_apply.append(op.c_support_code_apply(node, name))
c_init_code_apply.append(op.c_init_code_apply(node, name))
struct_init = op.c_init_code_struct(node, name, sub_struct)
struct_support = op.c_support_code_struct(node, name)
struct_cleanup = op.c_cleanup_code_struct(node, name)
behavior = op.c_code(node, name, isyms, osyms, sub)
assert isinstance(
behavior, str
), f"{node.op} didn't return a string for c_code"
# To help understand what is following. It help read the c code.
# This prevent different op that generate the same c code
# to be merged, I suppose this won't happen...
behavior = "// Op class " + node.op.__class__.__name__ + "\n" + behavior
cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
_logger.info(f"compiling un-versioned Apply {node}")
blocks.append(CodeBlock("", behavior, cleanup, sub))
tasks.append((node, "code", id))
id += 1
init_blocks.append(
CodeBlock(struct_support, struct_init, struct_cleanup, {"id": id})
)
init_tasks.append((node, "init", id))
id += 1
# List of arg names for use in struct_gen. Note the call to
# uniq: duplicate inputs must only be passed once because they
# are mapped to the same name. Duplicates are defined by (a
# is b), rather than (a==b) since Constant instances can
# compare equal to equivalent Constant instances.
args = []
args += [
f"storage_{symbol[variable]}"
for variable in uniq(self.inputs + self.outputs + self.orphans)
]
# <<<<HASH_PLACEHOLDER>>>> will be replaced by a hash of the whole
# code in the file, including support code, in DynamicModule.code.
struct_name = f"__struct_compiled_op_{'<<<<HASH_PLACEHOLDER>>>>'}"
struct_code = struct_gen(
args, init_blocks, blocks, dict(failure_var=failure_var, name=struct_name)
)
self.struct_code = struct_code
self.struct_name = struct_name
self.args = args
self.r2symbol = symbol
self.init_blocks = init_blocks
self.init_tasks = init_tasks
self.blocks = blocks
self.tasks = tasks
all_info = self.inputs + self.outputs + self.orphans
self.c_support_code_apply = c_support_code_apply
self.c_init_code_apply = c_init_code_apply
if (self.init_tasks, self.tasks) != self.get_init_tasks():
print("init_tasks\n", self.init_tasks, file=sys.stderr)
print(self.get_init_tasks()[0], file=sys.stderr)
print("tasks\n", self.tasks, file=sys.stderr)
print(self.get_init_tasks()[1], file=sys.stderr)
assert (self.init_tasks, self.tasks) == self.get_init_tasks()
# List of indices that should be ignored when passing the arguments
# (basically, everything that the previous call to uniq eliminated)
self.dupidx = [
i
for i, x in enumerate(all_info)
if all_info.count(x) > 1 and all_info.index(x) != i
]
return self.struct_code
def support_code(self):
"""
Returns a list of support code strings that are needed by
one or more Variables or Ops.
The support code from Variables is added before the support code from Ops.This might contain duplicates.
"""
ret = []
if config.cmodule__debug:
ret.append(
"""
#ifndef DEBUG
#define DEBUG
#endif
"""
)
# generic support code
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
support_code = x.c_support_code()
if isinstance(support_code, list):
ret.extend(support_code)
else:
ret.append(support_code)
return ret
def compile_args(self):
"""
Returns a list of compile args that are needed by one
or more Variables or Ops.
This might contain duplicates.
"""
ret = ["-O3"]
# this is the param the -ffast-math activate. I put the explicitly as
# FillMissing must disable some of them. Putting -ffast-math would
# make it disable all other parameter at the same time.
ret += [
"-fno-math-errno",
# "-funsafe-math-optimizations",
# "-fno-signaling-nans",
# "-fcx-limited-range",
# "-fno-rounding-math",
# "-ffinite-math-only",
# the current code generate label event if they are not used.
# Could use gcc attribute for those label only
"-Wno-unused-label",
"-Wno-unused-variable", # idem as the precedent
"-Wno-write-strings", # generated by our code generator...
]
c_compiler = self.c_compiler()
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
if isinstance(x, CLinkerObject):
ret += x.c_compile_args(c_compiler=c_compiler)
ret = uniq(ret) # to remove duplicate
# The args set by the compiler include the user flags. We do not want
# to reorder them
ret += c_compiler.compile_args()
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
if isinstance(x, CLinkerObject):
no_comp = x.c_no_compile_args(c_compiler=c_compiler)
for i in no_comp:
try:
ret.remove(i)
except ValueError:
pass # in case the value is not there
return ret
def headers(self):
"""