From 643110d08804273eb27f3552e2862b954c56ccee Mon Sep 17 00:00:00 2001 From: Andy Chu Date: Wed, 11 Dec 2019 18:47:56 -0800 Subject: [PATCH] [performance] Automation / tools for instrumenting memory allocation. - Instrument malloc and free (in addition to new and delete) - Create an histogram of Id returned from the lexer - Allow benchmarking against yash. OSH is faster! --- benchmarks/auto.sh | 2 +- benchmarks/id.sh | 2 +- build/mycpp.sh | 1 + core/ui.py | 9 +++++++++ cpp/dumb_alloc.cc | 28 ++++++++++++++++++++++++---- cpp/dumb_alloc.h | 8 ++++++-- cpp/frontend_match.cc | 6 ++++++ frontend/lexer.py | 9 ++++++++- mycpp/mylib.h | 8 ++++++++ oil_lang/grammar.pgen2 | 3 +++ vendor/typing.py | 1 + 11 files changed, 68 insertions(+), 9 deletions(-) diff --git a/benchmarks/auto.sh b/benchmarks/auto.sh index f9dd43b538..1de3cbe2c7 100755 --- a/benchmarks/auto.sh +++ b/benchmarks/auto.sh @@ -72,7 +72,7 @@ osh-parser-quick() { local osh_parse=_bin/osh_parse.opt.stripped local prov2 - prov2=$(benchmarks/id.sh shell-provenance bash dash mksh $osh_parse) + prov2=$(benchmarks/id.sh shell-provenance bash dash mksh yash $osh_parse) benchmarks/osh-parser.sh measure $prov2 $base_dir/osh-parser } diff --git a/benchmarks/id.sh b/benchmarks/id.sh index fc6e4cf531..6bac5d9121 100755 --- a/benchmarks/id.sh +++ b/benchmarks/id.sh @@ -84,7 +84,7 @@ dump-shell-id() { esac case $name in - bash|zsh) + bash|zsh|yash) $sh --version > $out_dir/version.txt ;; osh) diff --git a/build/mycpp.sh b/build/mycpp.sh index b7fbfe22bf..f820505140 100755 --- a/build/mycpp.sh +++ b/build/mycpp.sh @@ -163,6 +163,7 @@ compile() { # flags are split $CXX $flags \ + -D INSTRUMENT_MALLOC_FREE \ -I mycpp \ -I cpp \ -I _build/cpp \ diff --git a/core/ui.py b/core/ui.py index 7e0a17f741..2d743f0d10 100644 --- a/core/ui.py +++ b/core/ui.py @@ -251,6 +251,15 @@ def PrintAst(nodes, opts): if opts.ast_format == 'none': print('AST not printed.', file=sys.stderr) + if 0: + from _devbuild.gen.id_kind_asdl import Id_str + from frontend.lexer import ID_HIST + total = 0 + for id_, count in ID_HIST.most_common(): + print('%8d %s' % (count, Id_str(id_))) + total += count + print() + print('%8d total tokens returned' % total) else: # text output f = mylib.Stdout() diff --git a/cpp/dumb_alloc.cc b/cpp/dumb_alloc.cc index 189b67bbad..e506382536 100644 --- a/cpp/dumb_alloc.cc +++ b/cpp/dumb_alloc.cc @@ -5,11 +5,9 @@ #include // 100 MiB of memory -//char kMem[100 << 20]; -// 1 GiB for running 10 times -char kMem[1 << 30]; +char kMem[100 << 20]; -int gMemPos; +int gMemPos = 0; int gNumNew = 0; int gNumDelete = 0; @@ -30,6 +28,24 @@ void operator delete(void* p) noexcept { ++gNumDelete; } +char kMem2[100 << 20]; +int gMemPos2 = 0; +int gNumMalloc = 0; +int gNumFree = 0; + +void* dumb_malloc(size_t size) noexcept { + char* p = &(kMem2[gMemPos2]); + //fprintf(stderr, "malloc %d\n", size); + gMemPos2 += size; + ++gNumMalloc; + return p; +} + +void dumb_free(void* p) noexcept { + //fprintf(stderr, "free\n"); + ++gNumFree; +} + namespace dumb_alloc { void Summarize() { @@ -38,6 +54,10 @@ void Summarize() { fprintf(stderr, "\tgNumNew = %d\n", gNumNew); fprintf(stderr, "\tgNumDelete = %d\n", gNumDelete); fprintf(stderr, "\tgMemPos = %d\n", gMemPos); + fprintf(stderr, "\n"); + fprintf(stderr, "\tgNumMalloc = %d\n", gNumMalloc); + fprintf(stderr, "\tgNumFree = %d\n", gNumFree); + fprintf(stderr, "\tgMemPos2 = %d\n", gMemPos2); } }; diff --git a/cpp/dumb_alloc.h b/cpp/dumb_alloc.h index 517d78ce3d..920a41b58d 100644 --- a/cpp/dumb_alloc.h +++ b/cpp/dumb_alloc.h @@ -1,9 +1,13 @@ - -// Replacement for asdl/pretty.py +// Dumb Allocator #ifndef DUMB_ALLOC_H #define DUMB_ALLOC_H +#include // size_t + +void* dumb_malloc(size_t size) noexcept; +void dumb_free(void* p) noexcept; + namespace dumb_alloc { void Summarize(); diff --git a/cpp/frontend_match.cc b/cpp/frontend_match.cc index 93895d0496..aee199f9d2 100644 --- a/cpp/frontend_match.cc +++ b/cpp/frontend_match.cc @@ -7,6 +7,12 @@ #include "osh-types.h" #include "osh-lex.h" +#if INSTRUMENT_MALLOC_FREE +#include "dumb_alloc.h" +#define malloc dumb_malloc +#define free dumb_free +#endif + namespace match { Tuple2* OneToken(lex_mode_t lex_mode, Str* line, int start_pos) { diff --git a/frontend/lexer.py b/frontend/lexer.py index 39a2adba26..1d8fd0ac1f 100644 --- a/frontend/lexer.py +++ b/frontend/lexer.py @@ -13,10 +13,11 @@ from _devbuild.gen.id_kind_asdl import Id_t, Id, Kind from asdl import runtime from core.util import log +from mycpp import mylib from frontend import lookup from frontend import match -from typing import Callable, List, Tuple, Optional, TYPE_CHECKING +from typing import Callable, List, Tuple, Optional, Counter, TYPE_CHECKING if TYPE_CHECKING: from core.alloc import Arena from frontend.reader import _Reader @@ -255,5 +256,11 @@ def Read(self, lex_mode): if t.id != Id.Ignored_LineCont: break + #ID_HIST[t.id] += 1 #log('> Read() Returning %s', t) return t + + +if mylib.PYTHON: + import collections + ID_HIST = collections.Counter() # type: Counter[Id_t] diff --git a/mycpp/mylib.h b/mycpp/mylib.h index 7c85176db1..25c869c6d4 100644 --- a/mycpp/mylib.h +++ b/mycpp/mylib.h @@ -15,6 +15,12 @@ #include // CHAR_BIT #include +#if INSTRUMENT_MALLOC_FREE +#include "dumb_alloc.h" +#define malloc dumb_malloc +#define free dumb_free +#endif + // To reduce code size #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ @@ -247,6 +253,8 @@ class StrIter { template class List { public: + // TODO: Shoudl we assume there are 2 or 5 or 10 elements at first? Measure + // speed? List() : v_() { } diff --git a/oil_lang/grammar.pgen2 b/oil_lang/grammar.pgen2 index 63b33207de..c15f9079a3 100644 --- a/oil_lang/grammar.pgen2 +++ b/oil_lang/grammar.pgen2 @@ -7,6 +7,9 @@ # mytuple, # f(args,) # func f(params,) +# +# Kinds used: +# Left, Right, Expr, Op, Arith, Eof, Char # Oil patch: removed @= **= //= # We're missing div= and xor=, which now look weird. ^= is diff --git a/vendor/typing.py b/vendor/typing.py index f7b5240ab0..357e14b3eb 100644 --- a/vendor/typing.py +++ b/vendor/typing.py @@ -24,6 +24,7 @@ Any = None NoReturn = None Callable = None +Counter = None # for ID_HIST TYPE_CHECKING = False