Skip to content

Commit

Permalink
Race condition can lead to non-exhaustive state space exploration or
Browse files Browse the repository at this point in the history
bogus counterexamples with parallel TLC.

Github issue #361.
#361

[Bug][TLC]
  • Loading branch information
lemmy committed Oct 31, 2019
1 parent 2b5df65 commit f1a089e
Show file tree
Hide file tree
Showing 10 changed files with 291 additions and 15 deletions.
2 changes: 1 addition & 1 deletion tlatools/src/tlc2/tool/impl/Spec.java
Expand Up @@ -468,7 +468,7 @@ public final Object lookup(SymbolNode opNode, Context c, boolean cutoff)

result = opNode.getToolObject(toolId);
if (result != null) {
return result;
return WorkerValue.mux(result);
}

if (opNode.getKind() == UserDefinedOpKind)
Expand Down
16 changes: 8 additions & 8 deletions tlatools/src/tlc2/tool/impl/SpecProcessor.java
Expand Up @@ -71,10 +71,8 @@
import tlc2.tool.Defns;
import tlc2.tool.EvalException;
import tlc2.tool.Specs;
import tlc2.tool.TLCState;
import tlc2.tool.TLCStateMut;
import tlc2.tool.ToolGlobals;
import tlc2.tool.coverage.CostModel;
import tlc2.util.Context;
import tlc2.util.List;
import tlc2.util.Vect;
Expand Down Expand Up @@ -206,7 +204,11 @@ private void processConstantDefns(ModuleNode mod) {
for (int i = 0; i < consts.length; i++) {
Object val = consts[i].getToolObject(toolId);
if (val != null && val instanceof IValue) {
((IValue)val).initialize();
// We do not wrap this value in a WorkerValue, because we assume that explicit
// initialization does not pose a problem here. This is based on the observation,
// that val is either an atom (IValue#isAtom) or a set (of sets) of atoms (primarily
// ModelValues).
((IValue)val).initialize();
// System.err.println(consts[i].getName() + ": " + val);
} // The following else clause was added by LL on 17 March 2012.
else if (val != null && val instanceof OpDefNode) {
Expand All @@ -220,9 +222,8 @@ else if (val != null && val instanceof OpDefNode) {

if (opDef.getArity() == 0) {
try {
IValue defVal = spec.eval(opDef.getBody(), Context.Empty, TLCState.Empty, CostModel.DO_NOT_RECORD);
defVal.initialize();
consts[i].setToolObject(toolId, defVal);
Object defVal = WorkerValue.demux(spec, consts[i], opDef);
opDef.setToolObject(toolId, defVal);
} catch (Assert.TLCRuntimeException | EvalException e) {
Assert.fail(EC.TLC_CONFIG_SUBSTITUTION_NON_CONSTANT,
new String[] { consts[i].getName().toString(), opDef.getName().toString() });
Expand Down Expand Up @@ -256,8 +257,7 @@ else if (val != null && val instanceof OpDefNode) {
continue DEFS;
}
// System.err.println(opName);
IValue val = spec.eval(opDef.getBody(), Context.Empty, TLCState.Empty, CostModel.DO_NOT_RECORD);
val.initialize();
final Object val = WorkerValue.demux(spec, opDef);
// System.err.println(opName + ": " + val);
opDef.setToolObject(toolId, val);
Object def = this.defns.get(opName);
Expand Down
6 changes: 3 additions & 3 deletions tlatools/src/tlc2/tool/impl/Tool.java
Expand Up @@ -779,7 +779,7 @@ else if (!(fval instanceof Applicable)) {
* in the given state.
*/
@Override
public final StateVec getNextStates(Action action, TLCState state) {
public final StateVec getNextStates(Action action, TLCState state) {
ActionItemList acts = ActionItemList.Empty;
TLCState s1 = TLCState.Empty.createEmpty();
StateVec nss = new StateVec(0);
Expand Down Expand Up @@ -1495,7 +1495,7 @@ private final Value evalImpl(final SemanticNode expr, final Context c, final TLC
case DecimalKind:
case StringKind:
{
return (Value) expr.getToolObject(toolId);
return (Value) WorkerValue.mux(expr.getToolObject(toolId));
}
case AtNodeKind:
{
Expand Down Expand Up @@ -2050,7 +2050,7 @@ else if ((fval instanceof TupleValue) ||
case OPCODE_rs: // RcdSelect
{
Value rval = this.eval(args[0], c, s0, s1, control, cm);
Value sval = (Value) args[1].getToolObject(toolId);
Value sval = (Value) WorkerValue.mux(args[1].getToolObject(toolId));
if (rval instanceof RecordValue) {
Value result = (Value) ((RecordValue)rval).select(sval);
if (result == null) {
Expand Down
158 changes: 158 additions & 0 deletions tlatools/src/tlc2/tool/impl/WorkerValue.java
@@ -0,0 +1,158 @@
/*******************************************************************************
* Copyright (c) 2019 Microsoft Research. All rights reserved.
*
* The MIT License (MIT)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Contributors:
* Markus Alexander Kuppe - initial API and implementation
******************************************************************************/
package tlc2.tool.impl;

import tla2sany.semantic.OpDefNode;
import tla2sany.semantic.SemanticNode;
import tlc2.TLCGlobals;
import tlc2.tool.IWorker;
import tlc2.tool.TLCState;
import tlc2.tool.coverage.CostModel;
import tlc2.util.Context;
import tlc2.value.IValue;
import tlc2.value.impl.Enumerable;

/*
Root Cause:
---------
1) [Values](https://github.com/tlaplus/tlaplus/blob/master/tlatools/src/tlc2/value/impl/Value.java) can be (deeply) nested.
1) Values transition through the following sequence of states: New (instantiated), normalized, and fingerprinted. Transitioning from one state to another is not thread-safe. Non-technical speaking, a value is both the symbolic and the explicit entity.
2) Normalization of an (enumerable) value causes its elements to be sorted and duplicates to be removed. It does not cause new elements to be generated/enumerated. For example, normalizing the [SetPredValue](https://github.com/tlaplus/tlaplus/blob/master/tlatools/src/tlc2/value/impl/SetPredValue.java), corresponding to the expression ```tla { s \in SUBSET S : s # {} }```, causes ```S``` to be sorted and any duplicate elements to be removed.
3) Fingerprinting an enumerable value causes its elements to be explicitly enumerated. For example, the SetPredValue from before causes all subsets ```s``` to be enumerated.
4) At startup, TLC eagerly evaluates zero-arity constant definitions and operators which involves creating values. These values are normalized but not fingerprinted! For example, the operator ```NonEmptySubsS == { s \in SUBSET S : s # {} }``` will be evaluated to a SetPredValue that is stored in the ```NonEmptySubsS``` node of the semantic graph. We denote the set of values in the semantic graph with V<sub>SG</sub>.
5) Generating a [TLCState](https://github.com/tlaplus/tlaplus/blob/master/tlatools/src/tlc2/tool/TLCTrace.java) during the evaluation of the next-state relation, assigns newly created values to variables. Before a TLCState is enqueued in the [StateQueue](https://github.com/tlaplus/tlaplus/blob/master/tlatools/src/tlc2/tool/queue/StateQueue.java), its values are fingerprinted. Thus, values of TLCStates can safely be shared across [Workers](https://github.com/tlaplus/tlaplus/blob/master/tlatools/src/tlc2/tool/Worker.java).
Lets assume a spec with one variable ```x``` whose value - in all states - is some set. Furthermore, let ```P``` be the property ```[](x \in NonEmptySubsS)```. For simplicity reasons, we assume:
1) ```P``` is not checked on the initial states
2) Two Workers check next states simultaneously
Under these two assumptions, both Workers will race to enumerate (fingerprint) ```NonEmptySubsS```.
Note that the fact that we had to assume 1) and 2) explains why this race condition didn't surfaced earlier. However, the assumptions are true in real-world executions of TLC. For example, the evaluation of a more complex NonEmptySubsS, with a deeply nested hierarchy of values, may not require the evaluation of nested values in the initial states.
---------------
@xxyzzn came up with another scenario that might lead to the calculation of bogus fingerprints:
```tla
EXTENDS Integers, Sequences
VARIABLE x, y
Foo == { s \in SUBSET {1,2,3,4} : s # {} }
Init == x = << >> /\ y \in 1..2
Next == x' \in {<<i, Foo>> : i \in 1..2} /\ UNCHANGED y
```
Variable ```y``` only exists so that the state-graph has two initial states (sequentially generated). A single initial state would restrict the first evaluation of Next to a single Worker (only a single initial state to dequeue from the ```StateQueue```) and thus fingerprinting cannot occur concurrently.
Attempted Fix A:
---------------------
[At startup](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/tool/impl/SpecProcessor.java#L202-L281), (eagerly) fingerprint all values of V<sub>SG</sub>.
Problem: Not all values in V<sub>SG</sub> need not be fingerprinted to completely check a spec. For some specs such as [CarTalkPuzzle](https://github.com/tlaplus/tlaplus/issues/361#issuecomment-543256201), this is even prohibitive because explicit enumeration is infeasible: TLC will hang at startup.
Deferring fingerprinting of V<sub>SG</sub> until after the assumptions have been checked, such as when the initial states are (atomically) generated, is no general solution.
Proposed Fix A:
--------------------
Properly synchronize values such that no race conditions are possible if two or more Workers enumerate them concurrently.
Disadvantages:
1) It creates a scalability bottleneck: Workers will content for the synchronized ```NonEmptySubs```on every state that is generated.
2) Engineering effort to correctly synchronize the Value type hierarchy without introducing dead-locks.
Proposed Fix B:
--------------------
For each worker, create a copy of V<sub>SG</sub> (technically borrows from the concept of a ```ThreadLocal```).
Disadvantages:
1) Requires a type check (and potentially cast) on every [lookup](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/tool/impl/Spec.java#L448-L497) that occurrs during the [evaluation of the next-state relation](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/tool/impl/Tool.java#L782-L814). An optimization would be to [eventually](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/tool/AbstractChecker.java#L496) garbage-collect the copies of V<sub>SG</sub> to at least skip the type cast once one copy is completely fingerprinted.
2) [Value#deepCopy](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/value/IValue.java#L104) - required to create the copies of V<sub>SG</sub> - is [broken for most value types](https://github.com/tlaplus/tlaplus/blob/63bab42e79e750b7ac033cfe7196d8f0b53e861e/tlatools/src/tlc2/value/impl/SubsetValue.java#L232) (can probably be fixed).
*/
public class WorkerValue {

/*
* Demuxing is supposed to be called only once per sn/opDef whereas muxing is called many many times.
*/

public static Object demux(final Spec spec, final OpDefNode opDef) {
return demux(spec, opDef, opDef);
}

public static Object demux(final Spec spec, final SemanticNode sn, final OpDefNode opDef) {
final IValue defVal = spec.eval(opDef.getBody(), Context.Empty, TLCState.Empty, CostModel.DO_NOT_RECORD);
defVal.deepNormalize();

if (defVal instanceof Enumerable && TLCGlobals.getNumWorkers() > 1) {
final IValue[] values = new IValue[TLCGlobals.getNumWorkers()];
values[0] = defVal;

for (int i = 1; i < values.length; i++) {
// Ideally, we could invoke IValue#deepCopy here instead of evaluating opDef again. However,
// IValue#deepCopy doesn't create copies for most values.
values[i] = spec.eval(opDef.getBody(), Context.Empty, TLCState.Empty, CostModel.DO_NOT_RECORD);
values[i].deepNormalize();
}

return new WorkerValue(values);
} else {
return defVal;
}
}

public static Object mux(final Object result) {
if (!(result instanceof WorkerValue)) {
return result;
}

final WorkerValue vp = (WorkerValue) result;
final Thread t = Thread.currentThread();
if (t instanceof IWorker) {
final IWorker w = (IWorker) t;
return vp.values[w.myGetId()];
} else {
return vp.values[0];
}
}

private final IValue[] values;

private WorkerValue(IValue[] values) {
this.values = values;
}
}
16 changes: 14 additions & 2 deletions tlatools/src/tlc2/value/IValue.java
Expand Up @@ -29,6 +29,10 @@

import tla2sany.semantic.SemanticNode;
import tlc2.tool.coverage.CostModel;
import tlc2.value.impl.BoolValue;
import tlc2.value.impl.IntValue;
import tlc2.value.impl.ModelValue;
import tlc2.value.impl.StringValue;

public interface IValue extends Comparable<Object> {

Expand Down Expand Up @@ -66,11 +70,12 @@ public interface IValue extends Comparable<Object> {
*
* see comment in UnionValue#deepNormalize too
*/
default void initialize() {
default IValue initialize() {
this.deepNormalize();
// Execute fingerprint code path to internally trigger convertAndCache iff
// defined (0L parameter is not relevant)
this.fingerPrint(0L);
return this;
}

/**
Expand Down Expand Up @@ -113,5 +118,12 @@ default void initialize() {
String toString();

String toString(String delim);


default boolean isAtom() {
if (this instanceof ModelValue || this instanceof IntValue || this instanceof StringValue
|| this instanceof BoolValue) {
return true;
}
return false;
}
}
3 changes: 2 additions & 1 deletion tlatools/src/tlc2/value/impl/MethodValue.java
Expand Up @@ -67,9 +67,10 @@ private MethodValue(final Method md) {
public final byte getKind() { return METHODVALUE; }

@Override
public final void initialize() {
public final IValue initialize() {
this.deepNormalize();
// Do not call fingerprint as a MethodValue has no fingerprint.
return this;
}

@Override
Expand Down
18 changes: 18 additions & 0 deletions tlatools/src/tlc2/value/impl/SetEnumValue.java
Expand Up @@ -54,6 +54,24 @@ public SetEnumValue(CostModel cm) {
this();
this.cm = cm;
}

// See IValue#isAtom except that this is for sets of atoms.
public final boolean isSetOfAtoms() {
final int len = this.elems.size();
for (int i = 0; i < len; i++) {
final Value v = this.elems.elementAt(i);
if (v instanceof SetEnumValue) {
// Sets of sets of sets... of atoms.
final SetEnumValue sev = (SetEnumValue) v;
if (!sev.isSetOfAtoms()) {
return false;
}
} else if (!v.isAtom()) {
return false;
}
}
return true;
}

@Override
public final byte getKind() { return SETENUMVALUE; }
Expand Down
4 changes: 4 additions & 0 deletions tlatools/test-model/Github361.cfg
@@ -0,0 +1,4 @@
INIT
Init
NEXT
Next
24 changes: 24 additions & 0 deletions tlatools/test-model/Github361.tla
@@ -0,0 +1,24 @@
---- MODULE Github361 ----
EXTENDS TLC, Naturals

Nodes == {1, 2, 3, 4, 5}
null == 6 \* not in Nodes

Vertices == [nodes : SUBSET Nodes, succ : Nodes \cup {null}, dead : BOOLEAN]

Partitions == {P \in SUBSET Vertices :
/\ \A v1, v2 \in P :
(v1 # v2) => ((v1.nodes \cap v2.nodes) = {})
/\ \A v \in P : /\ v.nodes # {}
/\ \E w \in P : \/ v.succ = null
\/ v.succ \in w.nodes}

VARIABLES partition

TypeOK == partition \in Partitions

Init == partition = {}

Next == UNCHANGED partition

=============================================================================

0 comments on commit f1a089e

Please sign in to comment.