From b114b8d41fd5410e07d94337be2a6e224a16d13d Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Sun, 12 Feb 2023 21:09:29 +0100 Subject: [PATCH] cgen: make `.global.`s work more according to spec For the C target, globals defined inside procedures that are resource- like (i.e. have a user-defined or lifted destructor) are now initialized in a module's pre-init procedure, instead of each time control-flow reaches the definition. This mirrors what already happened for non-resource-like globals. In addition, globals defined in `inline` procedures no longer cause linking errors because of duplicated definitions in the generated C code. The globals are extracted before translating the AST to MIR code, making the workaround in `mirgen` (that didn't work anyway) obsolete. In order to work towards unifying the architecture of the back-ends, `vmbackend` now also makes use of the pre-extraction, no longer requiring extra logic in the code-generator. - the destructor injection pass is not run for the initializer expression of globals defined inside procedures - `.global.`s on the JS target still don't work according to specification --- compiler/backend/cgen.nim | 35 ++++++++++++ compiler/mir/mirgen.nim | 15 +---- compiler/sem/injectdestructors.nim | 8 +++ compiler/sem/transf.nim | 57 +++++++++++++++++++ compiler/vm/vmbackend.nim | 36 ++++++++---- compiler/vm/vmdef.nim | 6 -- compiler/vm/vmgen.nim | 23 ++++---- tests/global/globalaux.nim | 9 +++ tests/global/tglobal.nim | 13 ++++- .../destructor/tdestructor_in_initializer.nim | 29 ++++++++++ 10 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 tests/lang_objects/destructor/tdestructor_in_initializer.nim diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index f5e29bd31f1..fa512b570e7 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -75,6 +75,10 @@ from compiler/ast/reports_sem import SemReport, reportTyp from compiler/ast/report_enums import ReportKind +# XXX: the code-generator should not need to know about the existance of +# destructor injections (or destructors, for that matter) +from compiler/sem/injectdestructors import deferGlobalDestructor + import std/strutils except `%` # collides with ropes.`%` from compiler/ic/ic import ModuleBackendFlag @@ -1028,6 +1032,37 @@ proc genProcAux(m: BModule, prc: PSym) = assert(prc.ast != nil) var procBody = transformBody(m.g.graph, m.idgen, prc, cache = false) + block: + # process globals defined by the procedure's body + var globals: seq[PNode] + extractGlobals(procBody, globals, isNimVm = false) + # note: we're modifying the procedure's cached transformed body above, + # meaning that globals defiend inside ``inline`` procedures are also only + # extracted once + + let m2 = findPendingModule(m, prc) + + # first pass: register the destructors + for it in globals.items: + deferGlobalDestructor(m2.g.graph, m2.idgen, prc, it[0]) + + # second pass: generate the initialization code. This is done here already, + # as it might depend on other procedures. Deferring this to ``genInitCode`` + # is not possible, because then it's too late to raise further dependencies. + # Also, generate the code in the pre-init procedure of the module where the + # procedure is *defined*, not where it's first *used* (this is only relevant + # for ``inline`` procedures, as they're generated multiple times) + for it in globals.items: + # since the identdefs are extracted from the transformed AST, the + # initializer expression isn't canonicalized yet + let value = + if it[2].kind != nkEmpty: + canonicalizeSingle(m2.g.graph, m2.idgen, prc, it[2], {}) + else: + it[2] + + genSingleVar(m2.preInitProc, it[0].sym, it[0], value) + procBody = canonicalizeWithInject(m.g.graph, m.idgen, prc, procBody, {}) if sfPure notin prc.flags and prc.typ[0] != nil: diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index 9d76279e7f8..093926eacba 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -1353,28 +1353,19 @@ proc genLocInit(c: var TCtx, symNode: PNode, initExpr: PNode) = hasInitExpr = initExpr.kind != nkEmpty wantsOwnership = sfCursor notin sym.flags and hasDestructor(sym.typ) - isProcGlobal = sfGlobal in sym.flags and - c.context in routineKinds assert sym.kind in {skVar, skLet, skTemp, skForVar} # if there's an initial value and the destination is non-owning, we pass the # value directly to the def - # HACK: we rely on an implementation detail of ``astgen`` (namely that a - # 'def' with an initializer is translated to a single ``nkVarSection`` - # node) for globals defined at procedure scope. This is not the way - # to go. It makes sense for these globals to be part of a procedure's - # AST during the sem phase, but past this point, it's actually harmful. - # They should be lifted into a separate list that is stored with the - # module the procedure is attached to, which the back-end then queries - if hasInitExpr and (not wantsOwnership or isProcGlobal): + if hasInitExpr and not wantsOwnership: # TODO: add a test for using a constructor expression for initializing a # cursor - forward: genx(c, initExpr, consume = isProcGlobal) + forward: genx(c, initExpr, consume = false) genLocDef(c, symNode) - if hasInitExpr and wantsOwnership and not isProcGlobal: + if hasInitExpr and wantsOwnership: # a copy or sink can't be expressed via a pass-to-def -- we need to use # the assignment operator genAsgn(c, true, symNode, initExpr) diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index 69e8e204f74..16cc0c279f9 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -1103,6 +1103,14 @@ func shouldInjectDestructorCalls*(owner: PSym): bool = {sfInjectDestructors, sfGeneratedOp} * owner.flags == {sfInjectDestructors} and (owner.kind != skIterator or not isInlineIterator(owner.typ)) +proc deferGlobalDestructor*(g: ModuleGraph, idgen: IdGenerator, owner: PSym, + global: PNode) = + ## If the global has a destructor, emits a call to it at the end of the + ## section of global destructors. + if sfThread notin global.sym.flags and hasDestructor(global.typ): + var c = Con(owner: owner, graph: g, idgen: idgen) + g.globalDestructors.add genDestroy(c, global) + proc injectDestructorCalls*(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n: PNode): PNode = when toDebug.len > 0: shouldDebug = toDebug == owner.name.s or toDebug == "always" diff --git a/compiler/sem/transf.nim b/compiler/sem/transf.nim index 89cd9cf47bd..ab141db344e 100644 --- a/compiler/sem/transf.nim +++ b/compiler/sem/transf.nim @@ -1154,3 +1154,60 @@ proc transformExpr*(g: ModuleGraph; idgen: IdGenerator; module: PSym, n: PNode): # expressions are not to be injected with destructor calls as that # the list of top level statements needs to be collected before. incl(result.flags, nfTransf) + +proc extractGlobals*(body: PNode, output: var seq[PNode], isNimVm: bool) = + ## Searches for all ``nkIdentDefs`` defining a global, appends them to + ## `output` in the order they appear in the input AST, and removes the nodes + ## from `body`. `isNimVm` signals which branch to select for ``when nimvm`` + ## statements/expressions. + ## + ## XXX: this can't happen as part of ``transformBody``, as ``transformBody`` + ## is reentrant because of ``lambdalifting`` and it's thus not easily + ## possible to collect something from the body of a single procedure + ## only. There's also the problem that extracting the globals is not + ## wanted when transformation happens for a procedure that's invoked + ## during CTFE and used in normal code. Eventually, ``transformBody`` + ## will no longer use the current caching mechanism and only produce the + ## transformed version of the input AST, but until then, + ## ``collectGlobals`` works good enough + case body.kind + of nkTypeSection, nkTypeOfExpr, nkCommentStmt, nkIncludeStmt, nkImportStmt, + nkImportExceptStmt, nkExportStmt, nkExportExceptStmt, nkFromStmt, + nkStaticStmt, nkMixinStmt, nkBindStmt, nkLambdaKinds, routineDefs, + nkNimNodeLit: + discard "ignore declarative contexts" + of nkWithoutSons: + discard "not relevant" + of nkConv, nkHiddenStdConv, nkHiddenSubConv: + # only analyse the imperative part: + extractGlobals(body[1], output, isNimVm) + of nkWhen: + # a ``when nimvm`` construct + # XXX: this logic duplicates what ``mirgen`` already does. Maybe + # collecting should happen there? Or should procedure-level globals + # be lifted from procedures during semantic analysis already? + let branch = + if isNimVm: body[0][1] + else: body[1][0] + + extractGlobals(branch, output, isNimVm) + of nkVarSection, nkLetSection: + # iterate over all children and extract identdefs of globals: + var i = 0 + while i < body.len: + let it = body[i] + if it.kind == nkIdentDefs and + it[0].kind == nkSym and sfGlobal in it[0].sym.flags: + # found one; append it to the output: + output.add(it) + # there's no need to process the initializer expression of the global, + # as we know that further globals defined inside them are not visible + # to the outside + body.sons.delete(i) + else: + inc i + + else: + # search all child nodes: + for it in body.items: + extractGlobals(it, output, isNimVm) \ No newline at end of file diff --git a/compiler/vm/vmbackend.nim b/compiler/vm/vmbackend.nim index e47853f92c6..3bfc9a9a85c 100644 --- a/compiler/vm/vmbackend.nim +++ b/compiler/vm/vmbackend.nim @@ -177,8 +177,13 @@ proc generateTopLevelStmts*(module: var Module, c: var TCtx, module.initProc = (start: start, regCount: c.prc.regInfo.len) -proc generateCodeForProc(c: var TCtx, s: PSym): VmGenResult = +proc generateCodeForProc(c: var TCtx, s: PSym, + globals: var seq[PNode]): VmGenResult = + ## Generates and emits the bytecode for the procedure `s`. The globals + ## defined in it are extracted from the body and their identdefs appended + ## to `globals`. var body = transformBody(c.graph, c.idgen, s, cache = false) + extractGlobals(body, globals, isNimVm = true) body = canonicalize(c.graph, c.idgen, s, body, {goIsNimvm}) result = genProc(c, s, body) @@ -197,6 +202,10 @@ proc generateGlobalInit(c: var TCtx, f: var CodeFragment, defs: openArray[PNode] for def in defs.items: assert def.kind == nkIdentDefs for i in 0.. 0: + # generate and emit the code for `{.global.}` initialization here, as the + # initializer expression might depend on otherwise unused procedures (which + # might define further globals...) + if globals.len > 0: let mI = mlist.moduleMap[c.module.id] generateGlobalInit(c, mlist.modules[mI].initGlobalsCode, - c.codegenInOut.globalDefs) + globals) - c.codegenInOut.globalDefs.setLen(0) + # prepare for reuse: + globals.setLen(0) # code-gen might've found new functions, so adjust the function table: c.functions.setLen(c.codegenInOut.nextProc) @@ -387,7 +402,6 @@ proc generateCode*(g: ModuleGraph) = mode: emStandalone) c.typeInfoCache.init() - c.codegenInOut.flags = {cgfCollectGlobals} # register the extra ops so that code generation isn't performed for the # corresponding procs: diff --git a/compiler/vm/vmdef.nim b/compiler/vm/vmdef.nim index 527f4e677ab..3b31a34d848 100644 --- a/compiler/vm/vmdef.nim +++ b/compiler/vm/vmdef.nim @@ -493,8 +493,6 @@ type cgfAllowMeta ## If not present, type or other meta expressions are ## disallowed in imperative contexts and code-gen for meta ## function arguments (e.g. `typedesc`) is suppressed - cgfCollectGlobals ## If present, the ident defs of `{.global.}` variables - ## are collected instead of code-gen'ed LinkIndex* = uint32 ## Depending on the context: `FunctionIndex`; index ## into `TCtx.globals`; index into `TCtx.complexConsts` @@ -519,10 +517,6 @@ type nextGlobal*: LinkIndex nextConst*: LinkIndex - globalDefs*: seq[PNode] ## output; collected `{.global.}` definitions. - ## They're only collected if `cgfCollectGlobals` - ## is active - flags*: set[CodeGenFlag] ## input VmGenDiagKind* = enum diff --git a/compiler/vm/vmgen.nim b/compiler/vm/vmgen.nim index 97635e079a7..6dd6003f0c2 100644 --- a/compiler/vm/vmgen.nim +++ b/compiler/vm/vmgen.nim @@ -2327,22 +2327,19 @@ proc genVarSection(c: var TCtx; n: PNode) = discard c.registerGlobal(s) discard c.getOrCreate(s.typ) - # no need to generate or collect if the global has no initializer + # no need to generate an assignment if the global has no initializer if a[2].kind == nkEmpty: continue - if cgfCollectGlobals in c.codegenInOut.flags and - s.owner != nil and - s.owner.kind in routineKinds: - # we encountered a function-level global and code generation for - # them is defered - c.codegenInOut.globalDefs.add a - else: - let tmp = genSymAddr(c, a[0]) - let val = c.genx(a[2]) - c.gABC(a, opcWrDeref, tmp, 0, val) - c.freeTemp(val) - c.freeTemp(tmp) + # for globals, ``vmgen`` trusts the callsite (i.e. the place where + # ``genProcBody`` is invoked) to make sure that globals defined + # inside procedures are extracted / otherwise taken care of. Thus, we + # emit the initialization logic here without further checks + let tmp = genSymAddr(c, a[0]) + let val = c.genx(a[2]) + c.gABC(a, opcWrDeref, tmp, 0, val) + c.freeTemp(val) + c.freeTemp(tmp) else: let reg = setSlot(c, s) if a[2].kind == nkEmpty: diff --git a/tests/global/globalaux.nim b/tests/global/globalaux.nim index 951472695e5..421207869f6 100644 --- a/tests/global/globalaux.nim +++ b/tests/global/globalaux.nim @@ -13,3 +13,12 @@ proc globalInstance*[T]: var TObj[T] = var g {.global.} = when T is int: makeObj(10) else: makeObj("hello") result = g +proc testInline*(cmp: int) {.inline.} = + # initialization of the global needs to happen at the start of ``globalaux`` + var v {.global.} = 1 + doAssert v == cmp + # modify the global. The other module from which ``testInline`` is called + # must be able to observe this modification: + v = 2 + +testInline(1) # change the `v` to '2' \ No newline at end of file diff --git a/tests/global/tglobal.nim b/tests/global/tglobal.nim index 5ce078215f9..ddcafa4c9e5 100644 --- a/tests/global/tglobal.nim +++ b/tests/global/tglobal.nim @@ -1,8 +1,15 @@ discard """ + description: ''' + Tests for globals defined inside procedures via the `.global.` pragma + ''' + targets: "c !vm" output: "in globalaux2: 10\ntotal globals: 2\nint value: 100\nstring value: second" - """ +## knownIssue: the VM backend initializes the globals in the pre-init procedure +## of the module where the generic is defined, not where it's +## instantiated + import globalaux, globalaux2 echo "total globals: ", totalGlobals @@ -14,3 +21,7 @@ globalInstance[string]().val = "first" globalInstance[string]().val = "second" echo "string value: ", globalInstance[string]().val +block inline_procedure_with_global: + # call the inline procedure and make sure that the global was not + # re-initialized at the start of the current module + testInline(2) \ No newline at end of file diff --git a/tests/lang_objects/destructor/tdestructor_in_initializer.nim b/tests/lang_objects/destructor/tdestructor_in_initializer.nim new file mode 100644 index 00000000000..a50aa2f8be4 --- /dev/null +++ b/tests/lang_objects/destructor/tdestructor_in_initializer.nim @@ -0,0 +1,29 @@ +discard """ + description: ''' + Regression test to make sure that locals inside the initializer of a global + defined inside a procedure's body are properly destroyed + ''' + targets: "c" + knownIssue: "the `injectdestructors` pass is not for the initializer" +""" + +var wasDestroyed: bool +# don't initialize the global; doing so might override the value assgined +# during pre-initialization + +# initialization of ``.global.``s currently happens *before* the code part +# of the module is executed, so if the destructor was called, ``wasDestroyed`` +# has to be true here +doAssert wasDestroyed + +type Resource = object + +proc `=destroy`(x: var Resource) = + wasDestroyed = true + +proc prc() = + var v {.global.} = block: + var r = Resource() # `r` needs to be destroyed at the end of the block + 1 + +prc() # use the procedure so that the global is part of the alive code \ No newline at end of file