v38 (Statistics): WIP

lemmy · Jun 14, 2023 · a064863 · a064863
1 parent c70b80e
commit a064863
Show file tree

Hide file tree

Showing 3 changed files with 263 additions and 0 deletions.
diff --git a/BlockingQueuePoisonApple_stats.R b/BlockingQueuePoisonApple_stats.R
@@ -0,0 +1,44 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+
+library(ggplot2)
+library(stringr)
+library(svglite)
+
+data <- read.csv(header=TRUE, sep = ",", file = args[1])
+
+## Poor-mans version of grouping the data according to 'P', 'B', and 'C' by concatenating the 0-padded values.
+data <- within(data, config <- paste(sep = "/", str_pad(data$P, 3, pad = "0"), str_pad(data$C, 3, pad = "0"),str_pad(data$B, 3, pad = "0")))
+
+## Group data by the 'config' column and calculate mean for 'over' and 'under'.
+ag.means <- aggregate(cbind(data$over, data$under), by=list(config=data$config), FUN=mean)
+names(ag.means)[names(ag.means)=="V1"] <- "mean_over"
+names(ag.means)[names(ag.means)=="V2"] <- "mean_under"
+
+## Group data by the 'config' column and calculate standard deviation for 'over' and 'under'.
+ag.sd <- aggregate(cbind(data$over, data$under), by=list(config=data$config), FUN=sd)
+names(ag.sd)[names(ag.sd)=="V1"] <- "sd_over"
+names(ag.sd)[names(ag.sd)=="V2"] <- "sd_under"
+
+## Merge the two data frames by the common 'config' column.
+df <- merge(ag.means, ag.sd)
+
+p <- ggplot(df, aes(x = config, y = mean_over, fill = config)) +
+  geom_errorbar(aes(ymin=mean_over-sd_over, ymax=mean_over+sd_over), width=.2, position=position_dodge(.9)) +
+  geom_bar(stat = "identity") +
+  geom_bar(aes(x= config, y = mean_under, fill=config), stat="identity",position="dodge") +
+  geom_errorbar(aes(ymin=mean_under-sd_under, ymax=mean_under+sd_under), width=.2, position=position_dodge(.9)) +
+  geom_hline(yintercept = 0, alpha = 0.3) +
+  #scale_x_discrete(guide = guide_axis(n.dodge=3))+
+  scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
+  coord_flip() +
+  theme_minimal() +
+  theme(legend.position = "none") +
+  labs(
+    x = "Configuration |P|/|C|/B",
+    y = "Average under- and over-provisioning of consumers (positive |ac|>|ap|)",
+    title = paste(
+      "Traces:", nrow(data), format(Sys.time(), "(%a %b %d %X %Y)")
+    )
+  )
+ggsave(gsub(".csv$", ".svg", args[1]), plot = last_plot(), bg = "white", device = "svg")
diff --git a/BlockingQueuePoisonApple_stats.tla b/BlockingQueuePoisonApple_stats.tla
@@ -0,0 +1,156 @@
+-------------------- MODULE BlockingQueuePoisonApple_stats -------------------
+EXTENDS BlockingQueuePoisonApple, CSV, TLC, TLCExt, Integers, IOUtils
+
+Max(a,b) ==
+    IF a > b THEN a ELSE b
+
+Min(a,b) ==
+    IF a < b THEN a ELSE b
+
+-----------------------------------------------------------------------------
+
+\* Collecting statistics does not work with an ordinary TLA+ spec such as 
+\* BlockingQueuePoisonApple because of its non-determinism.  The statistics
+\* would be all over the board and not meaningful.  Instead, we break the
+\* non-determinism by mimiking a real-world workload s.t. each Producer adds
+\* a (fixed) number of elements to the queue.  In other words, we remove 
+\* those behaviors from the state space that we do not expect to see in the
+\* executions of an implementation of the TLA+ spec.
+\* It would be more convenient to conjoin a liveness property that stipulates
+\* that each producer adds N elements to the queue instead of amending the
+\* original next-state relation  Next  with  SNext  below.  However, I don't
+\* see a way to get around adding the (auxiliary) pending variable that keeps
+\* track of the number of elements added by each producer. 
+\* Alternatively, a more variable approach, compared to producers adding a
+\* fixed number of elements to the queue, would be to add probabilities that
+\* model the likelyhood of a   Put  and  Terminate  actions to occur (with  
+\*  Put  having a much higher probability).  This is straighforward to model
+\* with TLC's  RandomElement  operator by conjoining it to  Put  and
+\*  Terminate:
+\*   RandomElement(1..10) \in 1..9
+\* and
+\*   RandomElement(1..10) \in 10..10
+\* A more sophisticated example is at
+\* https://github.com/lemmy/PageQueue/blob/master/PageQueue.tla
+
+\* Number of elements to put into the queue by each producer.
+CONSTANT Work
+ASSUME Work \in (Nat \ {0})
+
+\* Count how many elements have been added by each producer. This variable
+\* is how we model workloads.
+VARIABLES pending
+auxVars == <<pending>>
+
+\* The two auxilary variables are used to measure the over- and under-provisioning
+\* of consumers.
+\* TODO: Make Welford's algorithm for variance and standard deviation available
+\*       here (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm)
+\*       It would free users from doing things in R, ... , and probably be useful
+\*       in 99% of collecting statistics.
+VARIABLE over, under
+statsVars == <<over, under>>
+
+STypeInv == 
+    /\ pending \in [ Producers -> 0..Work ]
+    /\ over \in Int
+    /\ under \in Int
+
+SInit == 
+    \* Verbatim copy (redundant) of the original spec.
+    /\ buffer = <<>>
+    /\ waitSet = {}
+    /\ cons = [ c \in Consumers |-> Cardinality(Producers) ]
+    /\ prod = [ p \in Producers |-> Cardinality(Consumers) ]
+    \* Workload:
+    /\ pending = [ p \in Producers |-> Work ]
+    \* Statistics:
+    /\ over = 0
+    /\ under = 0
+
+SNext == 
+    /\ \/ \E p \in Producers: /\ Put(p, p)
+                              \* Decrement pending iff the buffer changed.
+                              /\ IF buffer # buffer'
+                                 THEN pending' = [pending EXCEPT ![p]=@-1]
+                                 ELSE UNCHANGED pending
+       \/ \E c \in Consumers: /\ Get(c) 
+                              /\ UNCHANGED pending
+    \* Update the statistics.
+    /\ over' = Max(over, Cardinality(ac) - Cardinality(ap))
+    /\ under' = Min(under, Cardinality(ac) - Cardinality(ap))
+
+StatsSpec ==
+    SInit /\ [][SNext]_<<vars, auxVars, statsVars>>
+
+-----------------------------------------------------------------------------
+
+CONSTANT CSVFile
+
+StatsInv ==
+    \* Per-behavior stats written to CSVfile on global termination.  Global
+    \* termination is defined by the union of the active producers and consumers
+    \* being the empty set.
+    /\ (ap \cup ac = {}) =>
+            CSVWrite("%1$s,%2$s,%3$s,%4$s,%5$s,%6$s,%7$s",
+            <<
+                \* TLCGet("stats").traces equals the number of traces generated
+                \* so far.  Thus, individual records in the CSV can be aggregated
+                \* later.  TLCGet("level")  can be seen as the timestamp of the
+                \* record. 
+                TLCGet("stats").traces, TLCGet("level"),
+                \* 
+                Cardinality(Producers), Cardinality(Consumers), BufCapacity,
+                \* ...the actual statistics.
+                over, under
+            >>, CSVFile)
+
+-----------------------------------------------------------------------------
+
+\* Below, we read the values from the OS environment and turn them into constants
+\* of this spec.  The operators  s2n  and  SetOfNModelValues  should probably be
+\* moved into IOUtils.
+
+s2n(str) ==
+    CHOOSE n \in 0..2^16: ToString(n) = str
+
+SetOfNModelValues(lbl, N) ==
+   { TLCModelValue(lbl \o ToString(i)) : i \in 1..N }
+
+-----------------------------------------------------------------------------
+
+B ==
+    s2n(IOEnv.B)
+
+P ==
+   SetOfNModelValues("p", s2n(IOEnv.P))
+
+C ==
+   SetOfNModelValues("c", s2n(IOEnv.C))
+
+W ==
+    s2n(IOEnv.W)
+
+Out ==
+    IOEnv.Out
+
+===========================================================================
+
+--------------------- CONFIG BlockingQueuePoisonApple_stats ---------------------
+CONSTANTS
+ BufCapacity <- B
+ Producers   <- P
+ Consumers   <- C
+ Work        <- W
+ CSVFile     <- Out
+ Poison = Poison
+
+SPECIFICATION StatsSpec
+
+CHECK_DEADLOCK FALSE
+
+INVARIANT STypeInv
+
+INVARIANT StatsInv
+
+=============================================================================
diff --git a/BlockingQueuePoisonApple_statsSC.tla b/BlockingQueuePoisonApple_statsSC.tla
@@ -0,0 +1,63 @@
+--------------------- MODULE BlockingQueuePoisonApple_statsSC ---------------------
+EXTENDS TLC, IOUtils, Naturals, Sequences, CSV
+
+\* Assume a recent version of TLC that has support for all shenanigans below.
+ASSUME TLCGet("revision").timestamp >= 1628118195
+
+-----------------------------------------------------------------------------
+
+\* Filename for the CSV file that appears also in the R script and is passed
+\* to the nested TLC instances that are forked below.
+CSVFile ==
+    "BQPA_B_" \o ToString(JavaTime) \o ".csv"
+
+\* Write column headers to CSV file at startup of TLC instance that "runs"
+\* this script and forks the nested instances of TLC that simulate the spec
+\* and collect the statistics.
+ASSUME 
+    CSVWrite("trace,level,P,C,B,over,under", <<>>, CSVFile)
+
+PlotStatistics ==
+    \* Have TLC execute the R script on the generated CSV file.
+    LET proc == IOExec(<<
+            \* Find R on the current system (known to work on macOS and Linux).
+            "/usr/bin/env", "Rscript",
+            "BlockingQueuePoisonApple_stats.R", CSVFile>>)
+    IN \/ proc.exitValue = 0
+       \/ PrintT(proc) \* Print stdout and stderr if R script fails.
+
+-----------------------------------------------------------------------------
+
+\* Command to fork nested TLC instances that simulate the spec and collect the
+\* statistics. TLCGet("config").install gives the path to the TLC jar also
+\* running this script.
+Cmd == LET absolutePathOfTLC == TLCGet("config").install
+       IN <<"java", "-jar",
+          absolutePathOfTLC, 
+          "-generate", "num=100",
+          "-config", "BlockingQueuePoisonApple_stats.tla",
+          "BlockingQueuePoisonApple_stats">>
+
+Success(e) ==
+    /\ PrintT(e)
+    /\ PlotStatistics
+
+ASSUME \A conf \in 
+            { r \in [ {"P","C","B"} -> {1,2,4,8,16,32}]:
+                \* Check only symmetric configs of Producers and Consumers.
+                \* Over/Under-provisioning for asymmetrics numbers of
+                \*  Producers  and  Consumers.
+                r.P = r.C }:
+    LET ret == IOEnvExec(conf @@ [W |-> 16, Out |-> CSVFile], Cmd).exitValue
+    IN CASE ret =  0 -> Success(conf)
+         [] ret = 10 -> PrintT(<<conf, "Assumption violation">>)
+         [] ret = 12 -> PrintT(<<conf, "Safety violation">>)
+         [] ret = 13 -> PrintT(<<conf, "Liveness violation">>)
+         [] OTHER    -> Print(<<conf, IOEnvExec(conf, Cmd), "Error">>, FALSE)
+
+=============================================================================
+---- CONFIG BlockingQueuePoisonAppleSC ----
+\* TLC always expects a configuration file, but an empty one will do in this case.
+\* If this approach proves useful, TLC should be extended to launch without a config
+\* file.
+====