Skip to content

Commit

Permalink
Merge #536
Browse files Browse the repository at this point in the history
536: cgen: make `.global.`s work more according to spec r=saem a=zerbina

## Summary
For the C target, globals defined inside procedures that are resource- like (i.e. have a user-defined or lifted destructor) are now initialized in a module's pre-init procedure, instead of each time control-flow reaches the definition. This mirrors what already happened for non-resource-like globals.

In addition, globals defined in `inline` procedures no longer cause linking errors because of duplicated definitions in the generated C code.

## Details
The globals are extracted before translating the AST to MIR code, making the workaround in `mirgen` (that didn't work anyway) obsolete. In order to work towards unifying the architecture of the back-ends, `vmbackend` now also makes use of the pre-extraction, no longer requiring extra logic in the code-generator.

### Known Issues
- the destructor-injection pass is not run for the initializer expression of globals defined inside procedures
- `.global.`s on the JS target still don't work according to specification



Co-authored-by: zerbina <100542850+zerbina@users.noreply.github.com>
  • Loading branch information
bors[bot] and zerbina committed Feb 14, 2023
2 parents b93ddd8 + b114b8d commit 5d1c185
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 43 deletions.
35 changes: 35 additions & 0 deletions compiler/backend/cgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ from compiler/ast/reports_sem import SemReport,
reportTyp
from compiler/ast/report_enums import ReportKind

# XXX: the code-generator should not need to know about the existance of
# destructor injections (or destructors, for that matter)
from compiler/sem/injectdestructors import deferGlobalDestructor

import std/strutils except `%` # collides with ropes.`%`

from compiler/ic/ic import ModuleBackendFlag
Expand Down Expand Up @@ -1028,6 +1032,37 @@ proc genProcAux(m: BModule, prc: PSym) =
assert(prc.ast != nil)

var procBody = transformBody(m.g.graph, m.idgen, prc, cache = false)
block:
# process globals defined by the procedure's body
var globals: seq[PNode]
extractGlobals(procBody, globals, isNimVm = false)
# note: we're modifying the procedure's cached transformed body above,
# meaning that globals defiend inside ``inline`` procedures are also only
# extracted once

let m2 = findPendingModule(m, prc)

# first pass: register the destructors
for it in globals.items:
deferGlobalDestructor(m2.g.graph, m2.idgen, prc, it[0])

# second pass: generate the initialization code. This is done here already,
# as it might depend on other procedures. Deferring this to ``genInitCode``
# is not possible, because then it's too late to raise further dependencies.
# Also, generate the code in the pre-init procedure of the module where the
# procedure is *defined*, not where it's first *used* (this is only relevant
# for ``inline`` procedures, as they're generated multiple times)
for it in globals.items:
# since the identdefs are extracted from the transformed AST, the
# initializer expression isn't canonicalized yet
let value =
if it[2].kind != nkEmpty:
canonicalizeSingle(m2.g.graph, m2.idgen, prc, it[2], {})
else:
it[2]

genSingleVar(m2.preInitProc, it[0].sym, it[0], value)

procBody = canonicalizeWithInject(m.g.graph, m.idgen, prc, procBody, {})

if sfPure notin prc.flags and prc.typ[0] != nil:
Expand Down
15 changes: 3 additions & 12 deletions compiler/mir/mirgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1353,28 +1353,19 @@ proc genLocInit(c: var TCtx, symNode: PNode, initExpr: PNode) =
hasInitExpr = initExpr.kind != nkEmpty
wantsOwnership = sfCursor notin sym.flags and
hasDestructor(sym.typ)
isProcGlobal = sfGlobal in sym.flags and
c.context in routineKinds

assert sym.kind in {skVar, skLet, skTemp, skForVar}

# if there's an initial value and the destination is non-owning, we pass the
# value directly to the def
# HACK: we rely on an implementation detail of ``astgen`` (namely that a
# 'def' with an initializer is translated to a single ``nkVarSection``
# node) for globals defined at procedure scope. This is not the way
# to go. It makes sense for these globals to be part of a procedure's
# AST during the sem phase, but past this point, it's actually harmful.
# They should be lifted into a separate list that is stored with the
# module the procedure is attached to, which the back-end then queries
if hasInitExpr and (not wantsOwnership or isProcGlobal):
if hasInitExpr and not wantsOwnership:
# TODO: add a test for using a constructor expression for initializing a
# cursor
forward: genx(c, initExpr, consume = isProcGlobal)
forward: genx(c, initExpr, consume = false)

genLocDef(c, symNode)

if hasInitExpr and wantsOwnership and not isProcGlobal:
if hasInitExpr and wantsOwnership:
# a copy or sink can't be expressed via a pass-to-def -- we need to use
# the assignment operator
genAsgn(c, true, symNode, initExpr)
Expand Down
8 changes: 8 additions & 0 deletions compiler/sem/injectdestructors.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,14 @@ func shouldInjectDestructorCalls*(owner: PSym): bool =
{sfInjectDestructors, sfGeneratedOp} * owner.flags == {sfInjectDestructors} and
(owner.kind != skIterator or not isInlineIterator(owner.typ))

proc deferGlobalDestructor*(g: ModuleGraph, idgen: IdGenerator, owner: PSym,
global: PNode) =
## If the global has a destructor, emits a call to it at the end of the
## section of global destructors.
if sfThread notin global.sym.flags and hasDestructor(global.typ):
var c = Con(owner: owner, graph: g, idgen: idgen)
g.globalDestructors.add genDestroy(c, global)

proc injectDestructorCalls*(g: ModuleGraph; idgen: IdGenerator; owner: PSym; n: PNode): PNode =
when toDebug.len > 0:
shouldDebug = toDebug == owner.name.s or toDebug == "always"
Expand Down
57 changes: 57 additions & 0 deletions compiler/sem/transf.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1154,3 +1154,60 @@ proc transformExpr*(g: ModuleGraph; idgen: IdGenerator; module: PSym, n: PNode):
# expressions are not to be injected with destructor calls as that
# the list of top level statements needs to be collected before.
incl(result.flags, nfTransf)

proc extractGlobals*(body: PNode, output: var seq[PNode], isNimVm: bool) =
## Searches for all ``nkIdentDefs`` defining a global, appends them to
## `output` in the order they appear in the input AST, and removes the nodes
## from `body`. `isNimVm` signals which branch to select for ``when nimvm``
## statements/expressions.
##
## XXX: this can't happen as part of ``transformBody``, as ``transformBody``
## is reentrant because of ``lambdalifting`` and it's thus not easily
## possible to collect something from the body of a single procedure
## only. There's also the problem that extracting the globals is not
## wanted when transformation happens for a procedure that's invoked
## during CTFE and used in normal code. Eventually, ``transformBody``
## will no longer use the current caching mechanism and only produce the
## transformed version of the input AST, but until then,
## ``collectGlobals`` works good enough
case body.kind
of nkTypeSection, nkTypeOfExpr, nkCommentStmt, nkIncludeStmt, nkImportStmt,
nkImportExceptStmt, nkExportStmt, nkExportExceptStmt, nkFromStmt,
nkStaticStmt, nkMixinStmt, nkBindStmt, nkLambdaKinds, routineDefs,
nkNimNodeLit:
discard "ignore declarative contexts"
of nkWithoutSons:
discard "not relevant"
of nkConv, nkHiddenStdConv, nkHiddenSubConv:
# only analyse the imperative part:
extractGlobals(body[1], output, isNimVm)
of nkWhen:
# a ``when nimvm`` construct
# XXX: this logic duplicates what ``mirgen`` already does. Maybe
# collecting should happen there? Or should procedure-level globals
# be lifted from procedures during semantic analysis already?
let branch =
if isNimVm: body[0][1]
else: body[1][0]

extractGlobals(branch, output, isNimVm)
of nkVarSection, nkLetSection:
# iterate over all children and extract identdefs of globals:
var i = 0
while i < body.len:
let it = body[i]
if it.kind == nkIdentDefs and
it[0].kind == nkSym and sfGlobal in it[0].sym.flags:
# found one; append it to the output:
output.add(it)
# there's no need to process the initializer expression of the global,
# as we know that further globals defined inside them are not visible
# to the outside
body.sons.delete(i)
else:
inc i

else:
# search all child nodes:
for it in body.items:
extractGlobals(it, output, isNimVm)
36 changes: 25 additions & 11 deletions compiler/vm/vmbackend.nim
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,13 @@ proc generateTopLevelStmts*(module: var Module, c: var TCtx,

module.initProc = (start: start, regCount: c.prc.regInfo.len)

proc generateCodeForProc(c: var TCtx, s: PSym): VmGenResult =
proc generateCodeForProc(c: var TCtx, s: PSym,
globals: var seq[PNode]): VmGenResult =
## Generates and emits the bytecode for the procedure `s`. The globals
## defined in it are extracted from the body and their identdefs appended
## to `globals`.
var body = transformBody(c.graph, c.idgen, s, cache = false)
extractGlobals(body, globals, isNimVm = true)
body = canonicalize(c.graph, c.idgen, s, body, {goIsNimvm})
result = genProc(c, s, body)

Expand All @@ -197,6 +202,10 @@ proc generateGlobalInit(c: var TCtx, f: var CodeFragment, defs: openArray[PNode]
for def in defs.items:
assert def.kind == nkIdentDefs
for i in 0..<def.len-2:
if def[^1].kind == nkEmpty:
# do nothing for globals without initializer expressions
continue

# note: don't transform the expressions here; they already were, during
# transformation of their owning procs
let
Expand Down Expand Up @@ -225,6 +234,10 @@ proc generateAliveProcs(c: var TCtx, mlist: var ModuleList) =
# multiple times, `setLen` has to be used instead of `newSeq`
c.functions.setLen(c.codegenInOut.nextProc)

var globals: seq[PNode]
## the identdefs of global defined inside procedures. Reused across loop
## iterations for efficiency

# `newProcs` can grow during iteration, so `citems` has to be used
for ri, sym in c.codegenInOut.newProcs.cpairs:
c.config.internalAssert(sym.kind notin {skMacro, skTemplate}):
Expand All @@ -239,25 +252,27 @@ proc generateAliveProcs(c: var TCtx, mlist: var ModuleList) =
if c.functions[i].kind == ckCallback:
continue

assert c.codegenInOut.globalDefs.len == 0

# FIXME: using the module where the procedure is defined (i.e.
# ``getModule``) is wrong. It needs to be the module to which the
# symbol is *attached*, i.e. ``sym.itemId.module``
c.module = sym.getModule()
# code-gen' the routine. This might add new entries to the `newProcs` list
let r = generateCodeForProc(c, sym)
let r = generateCodeForProc(c, sym, globals)
if r.isOk:
fillProcEntry(c.functions[i], r.unsafeGet)
else:
c.config.localReport(vmGenDiagToLegacyReport(r.takeErr))

# `{.global.}` initialization is done here, since the initializer
# expression might contain references to other functions (which can
# also contain further globals...)
if c.codegenInOut.globalDefs.len > 0:
# generate and emit the code for `{.global.}` initialization here, as the
# initializer expression might depend on otherwise unused procedures (which
# might define further globals...)
if globals.len > 0:
let mI = mlist.moduleMap[c.module.id]
generateGlobalInit(c, mlist.modules[mI].initGlobalsCode,
c.codegenInOut.globalDefs)
globals)

c.codegenInOut.globalDefs.setLen(0)
# prepare for reuse:
globals.setLen(0)

# code-gen might've found new functions, so adjust the function table:
c.functions.setLen(c.codegenInOut.nextProc)
Expand Down Expand Up @@ -387,7 +402,6 @@ proc generateCode*(g: ModuleGraph) =
mode: emStandalone)

c.typeInfoCache.init()
c.codegenInOut.flags = {cgfCollectGlobals}

# register the extra ops so that code generation isn't performed for the
# corresponding procs:
Expand Down
6 changes: 0 additions & 6 deletions compiler/vm/vmdef.nim
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,6 @@ type
cgfAllowMeta ## If not present, type or other meta expressions are
## disallowed in imperative contexts and code-gen for meta
## function arguments (e.g. `typedesc`) is suppressed
cgfCollectGlobals ## If present, the ident defs of `{.global.}` variables
## are collected instead of code-gen'ed

LinkIndex* = uint32 ## Depending on the context: `FunctionIndex`; index
## into `TCtx.globals`; index into `TCtx.complexConsts`
Expand All @@ -519,10 +517,6 @@ type
nextGlobal*: LinkIndex
nextConst*: LinkIndex

globalDefs*: seq[PNode] ## output; collected `{.global.}` definitions.
## They're only collected if `cgfCollectGlobals`
## is active

flags*: set[CodeGenFlag] ## input

VmGenDiagKind* = enum
Expand Down
23 changes: 10 additions & 13 deletions compiler/vm/vmgen.nim
Original file line number Diff line number Diff line change
Expand Up @@ -2327,22 +2327,19 @@ proc genVarSection(c: var TCtx; n: PNode) =
discard c.registerGlobal(s)
discard c.getOrCreate(s.typ)

# no need to generate or collect if the global has no initializer
# no need to generate an assignment if the global has no initializer
if a[2].kind == nkEmpty:
continue

if cgfCollectGlobals in c.codegenInOut.flags and
s.owner != nil and
s.owner.kind in routineKinds:
# we encountered a function-level global and code generation for
# them is defered
c.codegenInOut.globalDefs.add a
else:
let tmp = genSymAddr(c, a[0])
let val = c.genx(a[2])
c.gABC(a, opcWrDeref, tmp, 0, val)
c.freeTemp(val)
c.freeTemp(tmp)
# for globals, ``vmgen`` trusts the callsite (i.e. the place where
# ``genProcBody`` is invoked) to make sure that globals defined
# inside procedures are extracted / otherwise taken care of. Thus, we
# emit the initialization logic here without further checks
let tmp = genSymAddr(c, a[0])
let val = c.genx(a[2])
c.gABC(a, opcWrDeref, tmp, 0, val)
c.freeTemp(val)
c.freeTemp(tmp)
else:
let reg = setSlot(c, s)
if a[2].kind == nkEmpty:
Expand Down
9 changes: 9 additions & 0 deletions tests/global/globalaux.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,12 @@ proc globalInstance*[T]: var TObj[T] =
var g {.global.} = when T is int: makeObj(10) else: makeObj("hello")
result = g

proc testInline*(cmp: int) {.inline.} =
# initialization of the global needs to happen at the start of ``globalaux``
var v {.global.} = 1
doAssert v == cmp
# modify the global. The other module from which ``testInline`` is called
# must be able to observe this modification:
v = 2

testInline(1) # change the `v` to '2'
13 changes: 12 additions & 1 deletion tests/global/tglobal.nim
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
discard """
description: '''
Tests for globals defined inside procedures via the `.global.` pragma
'''
targets: "c !vm"
output: "in globalaux2: 10\ntotal globals: 2\nint value: 100\nstring value: second"
"""

## knownIssue: the VM backend initializes the globals in the pre-init procedure
## of the module where the generic is defined, not where it's
## instantiated

import globalaux, globalaux2

echo "total globals: ", totalGlobals
Expand All @@ -14,3 +21,7 @@ globalInstance[string]().val = "first"
globalInstance[string]().val = "second"
echo "string value: ", globalInstance[string]().val

block inline_procedure_with_global:
# call the inline procedure and make sure that the global was not
# re-initialized at the start of the current module
testInline(2)
29 changes: 29 additions & 0 deletions tests/lang_objects/destructor/tdestructor_in_initializer.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
discard """
description: '''
Regression test to make sure that locals inside the initializer of a global
defined inside a procedure's body are properly destroyed
'''
targets: "c"
knownIssue: "the `injectdestructors` pass is not for the initializer"
"""

var wasDestroyed: bool
# don't initialize the global; doing so might override the value assgined
# during pre-initialization

# initialization of ``.global.``s currently happens *before* the code part
# of the module is executed, so if the destructor was called, ``wasDestroyed``
# has to be true here
doAssert wasDestroyed

type Resource = object

proc `=destroy`(x: var Resource) =
wasDestroyed = true

proc prc() =
var v {.global.} = block:
var r = Resource() # `r` needs to be destroyed at the end of the block
1

prc() # use the procedure so that the global is part of the alive code

0 comments on commit 5d1c185

Please sign in to comment.