From 57e36df91d354a32f637ef38556ea4e592566f6b Mon Sep 17 00:00:00 2001
From: Matt D'Souza <matt.dsouza@oracle.com>
Date: Tue, 22 Jul 2025 16:29:31 -0400
Subject: [PATCH 1/2] Update graal import

---
 ci/graal/ci/ci_common/common.jsonnet          |  1 -
 .../ci/ci_common/run-spec-tools.libsonnet     | 31 +++----------------
 ci/python-gate.libsonnet                      |  2 +-
 mx.graalpython/suite.py                       |  4 +--
 4 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/ci/graal/ci/ci_common/common.jsonnet b/ci/graal/ci/ci_common/common.jsonnet
index 1cfc8ef29b..68a15bfb84 100644
--- a/ci/graal/ci/ci_common/common.jsonnet
+++ b/ci/graal/ci/ci_common/common.jsonnet
@@ -128,7 +128,6 @@ common + common.frequencies + {
       "*.bgv",
       "*/graal_dumps/*/*",
     ],
-    timelimit: "30:00",
   },
   local linux_deps_extras = {
     packages+: {
diff --git a/ci/graal/ci/ci_common/run-spec-tools.libsonnet b/ci/graal/ci/ci_common/run-spec-tools.libsonnet
index cada794322..db7da13300 100644
--- a/ci/graal/ci/ci_common/run-spec-tools.libsonnet
+++ b/ci/graal/ci/ci_common/run-spec-tools.libsonnet
@@ -54,30 +54,9 @@ local std_get = (import "../../ci/ci_common/common-utils.libsonnet").std_get;
     local table = [ ["name", "variant", task_details_title] + platform_titles] + cols;
     table
   ,
-  // Removes the 'timelimit' property from an object.
-  // Usually, this is used to remove hard-coded (default) timelimits defined in `ci/ci_common/common.jsonnet`.
-  // These definitions assume that the os/arch definition comes first and will be refined later.
-  // With run-spec, however, this is not true in general because the os/arch is only fixed later
-  // in the pipeline. Thus, hard-coded timelimits would override any previous settings. To resolve
-  // this, we delete the default value altogether and explicitly set the timelimits for all jobs.
-  //
-  // Implementation note: we cannot set the value to `null` and use `std.prune` because that deletes hidden fields.
-  delete_timelimit(b)::
-    local public_fields = std.objectFields(b);
-    std.foldl(function(acc, k) acc +
-      local value = b[k];
-      if std.member(public_fields, k) then
-        if std.type(value) == "string" then
-          { [k]: value }
-        else
-          { [k]+: value }
-      else
-        if std.type(value) == "string" then
-          { [k]:: value }
-        else
-          { [k]+:: value }
-      ,
-      [k for k in std.objectFieldsAll(b) if k != "timelimit"],
-      {}
-    ),
+  // Check there is no 'timelimit' property on an object,
+  // so that it is safe to add the timelimit later and ordering won't matter.
+  check_no_timelimit(b)::
+    assert !std.objectHasAll(b, "timelimit") : "b";
+    b,
 }
\ No newline at end of file
diff --git a/ci/python-gate.libsonnet b/ci/python-gate.libsonnet
index a27c13dbfd..3ec8eef34d 100644
--- a/ci/python-gate.libsonnet
+++ b/ci/python-gate.libsonnet
@@ -57,7 +57,7 @@
         // this starts with _ on purpose so that it will be evaluated first
         "_1_os_arch_jdk": function(b)
             local edition = if (std.objectHasAll(b, 'graalvm_edition')) then b.graalvm_edition else 'ce';
-            tools.delete_timelimit(jdk_name_to_dict[edition][b.jdk] + default_os_arch(b.jdk, edition)[b.os][b.arch])
+            tools.check_no_timelimit(jdk_name_to_dict[edition][b.jdk] + default_os_arch(b.jdk, edition)[b.os][b.arch])
     })),
 
     //------------------------------------------------------------------------------------------------------------------
diff --git a/mx.graalpython/suite.py b/mx.graalpython/suite.py
index 47e380a7fb..1993f1047d 100644
--- a/mx.graalpython/suite.py
+++ b/mx.graalpython/suite.py
@@ -53,7 +53,7 @@
             },
             {
                 "name": "tools",
-                "version": "fd29b2e0ab7b3f341aedf9ee4dbffd2c21769592",
+                "version": "9edea91c00d8939fa75c2703165941e7b802e080",
                 "subdir": True,
                 "urls": [
                     {"url": "https://github.com/oracle/graal", "kind": "git"},
@@ -61,7 +61,7 @@
             },
             {
                 "name": "regex",
-                "version": "fd29b2e0ab7b3f341aedf9ee4dbffd2c21769592",
+                "version": "9edea91c00d8939fa75c2703165941e7b802e080",
                 "subdir": True,
                 "urls": [
                     {"url": "https://github.com/oracle/graal", "kind": "git"},

From 84736cd685f5951142c44136b2226b13eff539d8 Mon Sep 17 00:00:00 2001
From: Matt D'Souza <matt.dsouza@oracle.com>
Date: Wed, 29 Jan 2025 16:18:26 -0500
Subject: [PATCH 2/2] Integrate with new polybench setup

---
 benchmarks/interpreter/deltablue.py        | 647 +++++++++++++++++++
 benchmarks/interpreter/fibonacci.py        |  40 ++
 benchmarks/interpreter/pyinit.py           |  30 +
 benchmarks/interpreter/richards.py         | 422 ++++++++++++
 benchmarks/interpreter/sieve.py            |  43 ++
 benchmarks/warmup/pyflate-fast.py          | 717 +++++++++++++++++++++
 benchmarks/warmup/raytrace.py              | 417 ++++++++++++
 mx.graalpython/copyrights/overrides        |   7 +
 mx.graalpython/mx_graalpython_benchmark.py |  36 ++
 mx.graalpython/polybench-fork-counts.json  |   9 +
 mx.graalpython/suite.py                    |  18 +
 pyproject.toml                             |   2 +-
 12 files changed, 2387 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/interpreter/deltablue.py
 create mode 100644 benchmarks/interpreter/fibonacci.py
 create mode 100644 benchmarks/interpreter/pyinit.py
 create mode 100644 benchmarks/interpreter/richards.py
 create mode 100644 benchmarks/interpreter/sieve.py
 create mode 100644 benchmarks/warmup/pyflate-fast.py
 create mode 100644 benchmarks/warmup/raytrace.py
 create mode 100644 mx.graalpython/polybench-fork-counts.json

diff --git a/benchmarks/interpreter/deltablue.py b/benchmarks/interpreter/deltablue.py
new file mode 100644
index 0000000000..43bafc1305
--- /dev/null
+++ b/benchmarks/interpreter/deltablue.py
@@ -0,0 +1,647 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+"""
+deltablue.py
+============
+
+Ported for the PyPy project.
+Contributed by Daniel Lindsley
+
+This implementation of the DeltaBlue benchmark was directly ported
+from the `V8's source code`_, which was in turn derived
+from the Smalltalk implementation by John Maloney and Mario
+Wolczko. The original Javascript implementation was licensed under the GPL.
+
+It's been updated in places to be more idiomatic to Python (for loops over
+collections, a couple magic methods, ``OrderedCollection`` being a list & things
+altering those collections changed to the builtin methods) but largely retains
+the layout & logic from the original. (Ugh.)
+
+"""
+# The JS variant implements "OrderedCollection", which basically completely
+# overlaps with ``list``. So we'll cheat. :D
+class OrderedCollection(list):
+    pass
+
+
+class Strength(object):
+    REQUIRED = None
+    STRONG_PREFERRED = None
+    PREFERRED = None
+    STRONG_DEFAULT = None
+    NORMAL = None
+    WEAK_DEFAULT = None
+    WEAKEST = None
+
+    def __init__(self, strength, name):
+        super(Strength, self).__init__()
+        self.strength = strength
+        self.name = name
+
+    @classmethod
+    def stronger(cls, s1, s2):
+        return s1.strength < s2.strength
+
+    @classmethod
+    def weaker(cls, s1, s2):
+        return s1.strength > s2.strength
+
+    @classmethod
+    def weakest_of(cls, s1, s2):
+        if cls.weaker(s1, s2):
+            return s1
+
+        return s2
+
+    @classmethod
+    def strongest(cls, s1, s2):
+        if cls.stronger(s1, s2):
+            return s1
+
+        return s2
+
+    def next_weaker(self):
+        strengths = {
+            0: self.__class__.WEAKEST,
+            1: self.__class__.WEAK_DEFAULT,
+            2: self.__class__.NORMAL,
+            3: self.__class__.STRONG_DEFAULT,
+            4: self.__class__.PREFERRED,
+            # TODO: This looks like a bug in the original code. Shouldn't this be
+            #       ``STRONG_PREFERRED? Keeping for porting sake...
+            5: self.__class__.REQUIRED,
+        }
+        return strengths[self.strength]
+
+
+# This is a terrible pattern IMO, but true to the original JS implementation.
+Strength.REQUIRED = Strength(0, "required")
+Strength.STONG_PREFERRED = Strength(1, "strongPreferred")
+Strength.PREFERRED = Strength(2, "preferred")
+Strength.STRONG_DEFAULT = Strength(3, "strongDefault")
+Strength.NORMAL = Strength(4, "normal")
+Strength.WEAK_DEFAULT = Strength(5, "weakDefault")
+Strength.WEAKEST = Strength(6, "weakest")
+
+
+class Constraint(object):
+
+    def __init__(self, strength):
+        super(Constraint, self).__init__()
+        self.strength = strength
+
+    def add_constraint(self):
+        global planner
+        self.add_to_graph()
+        planner.incremental_add(self)
+
+    def satisfy(self, mark):
+        global planner
+        self.choose_method(mark)
+
+        if not self.is_satisfied():
+            if self.strength == Strength.REQUIRED:
+                print('Could not satisfy a required constraint!')
+
+            return None
+
+        self.mark_inputs(mark)
+        out = self.output()
+        overridden = out.determined_by
+
+        if overridden is not None:
+            overridden.mark_unsatisfied()
+
+        out.determined_by = self
+
+        if not planner.add_propagate(self, mark):
+            print('Cycle encountered')
+
+        out.mark = mark
+        return overridden
+
+    def destroy_constraint(self):
+        global planner
+        if self.is_satisfied():
+            planner.incremental_remove(self)
+        else:
+            self.remove_from_graph()
+
+    def is_input(self):
+        return False
+
+
+class UrnaryConstraint(Constraint):
+
+    def __init__(self, v, strength):
+        super(UrnaryConstraint, self).__init__(strength)
+        self.my_output = v
+        self.satisfied = False
+        self.add_constraint()
+
+    def add_to_graph(self):
+        self.my_output.add_constraint(self)
+        self.satisfied = False
+
+    def choose_method(self, mark):
+        if self.my_output.mark != mark and \
+           Strength.stronger(self.strength, self.my_output.walk_strength):
+            self.satisfied = True
+        else:
+            self.satisfied = False
+
+    def is_satisfied(self):
+        return self.satisfied
+
+    def mark_inputs(self, mark):
+        # No-ops.
+        pass
+
+    def output(self):
+        # Ugh. Keeping it for consistency with the original. So much for
+        # "we're all adults here"...
+        return self.my_output
+
+    def recalculate(self):
+        self.my_output.walk_strength = self.strength
+        self.my_output.stay = not self.is_input()
+
+        if self.my_output.stay:
+            self.execute()
+
+    def mark_unsatisfied(self):
+        self.satisfied = False
+
+    def inputs_known(self, mark):
+        return True
+
+    def remove_from_graph(self):
+        if self.my_output is not None:
+            self.my_output.remove_constraint(self)
+            self.satisfied = False
+
+
+class StayConstraint(UrnaryConstraint):
+
+    def __init__(self, v, string):
+        super(StayConstraint, self).__init__(v, string)
+
+    def execute(self):
+        # The methods, THEY DO NOTHING.
+        pass
+
+
+class EditConstraint(UrnaryConstraint):
+
+    def __init__(self, v, string):
+        super(EditConstraint, self).__init__(v, string)
+
+    def is_input(self):
+        return True
+
+    def execute(self):
+        # This constraint also does nothing.
+        pass
+
+
+class Direction(object):
+    # Hooray for things that ought to be structs!
+    NONE = 0
+    FORWARD = 1
+    BACKWARD = -1
+
+
+class BinaryConstraint(Constraint):
+
+    def __init__(self, v1, v2, strength):
+        super(BinaryConstraint, self).__init__(strength)
+        self.v1 = v1
+        self.v2 = v2
+        self.direction = Direction.NONE
+        self.add_constraint()
+
+    def choose_method(self, mark):
+        if self.v1.mark == mark:
+            if self.v2.mark != mark and Strength.stronger(self.strength, self.v2.walk_strength):
+                self.direction = Direction.FORWARD
+            else:
+                self.direction = Direction.BACKWARD
+
+        if self.v2.mark == mark:
+            if self.v1.mark != mark and Strength.stronger(self.strength, self.v1.walk_strength):
+                self.direction = Direction.BACKWARD
+            else:
+                self.direction = Direction.NONE
+
+        if Strength.weaker(self.v1.walk_strength, self.v2.walk_strength):
+            if Strength.stronger(self.strength, self.v1.walk_strength):
+                self.direction = Direction.BACKWARD
+            else:
+                self.direction = Direction.NONE
+        else:
+            if Strength.stronger(self.strength, self.v2.walk_strength):
+                self.direction = Direction.FORWARD
+            else:
+                self.direction = Direction.BACKWARD
+
+    def add_to_graph(self):
+        self.v1.add_constraint(self)
+        self.v2.add_constraint(self)
+        self.direction = Direction.NONE
+
+    def is_satisfied(self):
+        return self.direction != Direction.NONE
+
+    def mark_inputs(self, mark):
+        self.input().mark = mark
+
+    def input(self):
+        if self.direction == Direction.FORWARD:
+            return self.v1
+
+        return self.v2
+
+    def output(self):
+        if self.direction == Direction.FORWARD:
+            return self.v2
+
+        return self.v1
+
+    def recalculate(self):
+        ihn = self.input()
+        out = self.output()
+        out.walk_strength = Strength.weakest_of(
+            self.strength, ihn.walk_strength)
+        out.stay = ihn.stay
+
+        if out.stay:
+            self.execute()
+
+    def mark_unsatisfied(self):
+        self.direction = Direction.NONE
+
+    def inputs_known(self, mark):
+        i = self.input()
+        return i.mark == mark or i.stay or i.determined_by is None
+
+    def remove_from_graph(self):
+        if self.v1 is not None:
+            self.v1.remove_constraint(self)
+
+        if self.v2 is not None:
+            self.v2.remove_constraint(self)
+
+        self.direction = Direction.NONE
+
+
+class ScaleConstraint(BinaryConstraint):
+
+    def __init__(self, src, scale, offset, dest, strength):
+        self.direction = Direction.NONE
+        self.scale = scale
+        self.offset = offset
+        super(ScaleConstraint, self).__init__(src, dest, strength)
+
+    def add_to_graph(self):
+        super(ScaleConstraint, self).add_to_graph()
+        self.scale.add_constraint(self)
+        self.offset.add_constraint(self)
+
+    def remove_from_graph(self):
+        super(ScaleConstraint, self).remove_from_graph()
+
+        if self.scale is not None:
+            self.scale.remove_constraint(self)
+
+        if self.offset is not None:
+            self.offset.remove_constraint(self)
+
+    def mark_inputs(self, mark):
+        super(ScaleConstraint, self).mark_inputs(mark)
+        self.scale.mark = mark
+        self.offset.mark = mark
+
+    def execute(self):
+        if self.direction == Direction.FORWARD:
+            self.v2.value = self.v1.value * self.scale.value + self.offset.value
+        else:
+            self.v1.value = (
+                self.v2.value - self.offset.value) / self.scale.value
+
+    def recalculate(self):
+        ihn = self.input()
+        out = self.output()
+        out.walk_strength = Strength.weakest_of(
+            self.strength, ihn.walk_strength)
+        out.stay = ihn.stay and self.scale.stay and self.offset.stay
+
+        if out.stay:
+            self.execute()
+
+
+class EqualityConstraint(BinaryConstraint):
+
+    def execute(self):
+        self.output().value = self.input().value
+
+
+class Variable(object):
+
+    def __init__(self, name, initial_value=0):
+        super(Variable, self).__init__()
+        self.name = name
+        self.value = initial_value
+        self.constraints = OrderedCollection()
+        self.determined_by = None
+        self.mark = 0
+        self.walk_strength = Strength.WEAKEST
+        self.stay = True
+
+    def __repr__(self):
+        # To make debugging this beast from pdb easier...
+        return '<Variable: %s - %s>' % (
+            self.name,
+            self.value
+        )
+
+    def add_constraint(self, constraint):
+        self.constraints.append(constraint)
+
+    def remove_constraint(self, constraint):
+        self.constraints.remove(constraint)
+
+        if self.determined_by == constraint:
+            self.determined_by = None
+
+
+class Planner(object):
+
+    def __init__(self):
+        super(Planner, self).__init__()
+        self.current_mark = 0
+
+    def incremental_add(self, constraint):
+        mark = self.new_mark()
+        overridden = constraint.satisfy(mark)
+
+        while overridden is not None:
+            overridden = overridden.satisfy(mark)
+
+    def incremental_remove(self, constraint):
+        out = constraint.output()
+        constraint.mark_unsatisfied()
+        constraint.remove_from_graph()
+        unsatisfied = self.remove_propagate_from(out)
+        strength = Strength.REQUIRED
+        # Do-while, the Python way.
+        repeat = True
+
+        while repeat:
+            for u in unsatisfied:
+                if u.strength == strength:
+                    self.incremental_add(u)
+
+                strength = strength.next_weaker()
+
+            repeat = strength != Strength.WEAKEST
+
+    def new_mark(self):
+        self.current_mark += 1
+        return self.current_mark
+
+    def make_plan(self, sources):
+        mark = self.new_mark()
+        plan = Plan()
+        todo = sources
+
+        while len(todo):
+            c = todo.pop(0)
+
+            if c.output().mark != mark and c.inputs_known(mark):
+                plan.add_constraint(c)
+                c.output().mark = mark
+                self.add_constraints_consuming_to(c.output(), todo)
+
+        return plan
+
+    def extract_plan_from_constraints(self, constraints):
+        sources = OrderedCollection()
+
+        for c in constraints:
+            if c.is_input() and c.is_satisfied():
+                sources.append(c)
+
+        return self.make_plan(sources)
+
+    def add_propagate(self, c, mark):
+        todo = OrderedCollection()
+        todo.append(c)
+
+        while len(todo):
+            d = todo.pop(0)
+
+            if d.output().mark == mark:
+                self.incremental_remove(c)
+                return False
+
+            d.recalculate()
+            self.add_constraints_consuming_to(d.output(), todo)
+
+        return True
+
+    def remove_propagate_from(self, out):
+        out.determined_by = None
+        out.walk_strength = Strength.WEAKEST
+        out.stay = True
+        unsatisfied = OrderedCollection()
+        todo = OrderedCollection()
+        todo.append(out)
+
+        while len(todo):
+            v = todo.pop(0)
+
+            for c in v.constraints:
+                if not c.is_satisfied():
+                    unsatisfied.append(c)
+
+            determining = v.determined_by
+
+            for c in v.constraints:
+                if c != determining and c.is_satisfied():
+                    c.recalculate()
+                    todo.append(c.output())
+
+        return unsatisfied
+
+    def add_constraints_consuming_to(self, v, coll):
+        determining = v.determined_by
+        cc = v.constraints
+
+        for c in cc:
+            if c != determining and c.is_satisfied():
+                # I guess we're just updating a reference (``coll``)? Seems
+                # inconsistent with the rest of the implementation, where they
+                # return the lists...
+                coll.append(c)
+
+
+class Plan(object):
+
+    def __init__(self):
+        super(Plan, self).__init__()
+        self.v = OrderedCollection()
+
+    def add_constraint(self, c):
+        self.v.append(c)
+
+    def __len__(self):
+        return len(self.v)
+
+    def __getitem__(self, index):
+        return self.v[index]
+
+    def execute(self):
+        for c in self.v:
+            c.execute()
+
+
+# Main
+
+def chain_test(n):
+    """
+    This is the standard DeltaBlue benchmark. A long chain of equality
+    constraints is constructed with a stay constraint on one end. An
+    edit constraint is then added to the opposite end and the time is
+    measured for adding and removing this constraint, and extracting
+    and executing a constraint satisfaction plan. There are two cases.
+    In case 1, the added constraint is stronger than the stay
+    constraint and values must propagate down the entire length of the
+    chain. In case 2, the added constraint is weaker than the stay
+    constraint so it cannot be accomodated. The cost in this case is,
+    of course, very low. Typical situations lie somewhere between these
+    two extremes.
+    """
+    global planner
+    planner = Planner()
+    prev, first, last = None, None, None
+
+    # We need to go up to n inclusively.
+    for i in range(n + 1):
+        name = "v%s" % i
+        v = Variable(name)
+
+        if prev is not None:
+            EqualityConstraint(prev, v, Strength.REQUIRED)
+
+        if i == 0:
+            first = v
+
+        if i == n:
+            last = v
+
+        prev = v
+
+    StayConstraint(last, Strength.STRONG_DEFAULT)
+    edit = EditConstraint(first, Strength.PREFERRED)
+    edits = OrderedCollection()
+    edits.append(edit)
+    plan = planner.extract_plan_from_constraints(edits)
+
+    for i in range(100):
+        first.value = i
+        plan.execute()
+
+        if last.value != i:
+            print("Chain test failed.")
+
+
+def projection_test(n):
+    """
+    This test constructs a two sets of variables related to each
+    other by a simple linear transformation (scale and offset). The
+    time is measured to change a variable on either side of the
+    mapping and to change the scale and offset factors.
+    """
+    global planner
+    planner = Planner()
+    scale = Variable("scale", 10)
+    offset = Variable("offset", 1000)
+    src = None
+
+    dests = OrderedCollection()
+
+    for i in range(n):
+        src = Variable("src%s" % i, i)
+        dst = Variable("dst%s" % i, i)
+        dests.append(dst)
+        StayConstraint(src, Strength.NORMAL)
+        ScaleConstraint(src, scale, offset, dst, Strength.REQUIRED)
+
+    change(src, 17)
+
+    if dst.value != 1170:
+        print("Projection 1 failed")
+
+    change(dst, 1050)
+
+    if src.value != 5:
+        print("Projection 2 failed")
+
+    change(scale, 5)
+
+    for i in range(n - 1):
+        if dests[i].value != (i * 5 + 1000):
+            print("Projection 3 failed")
+
+    change(offset, 2000)
+
+    for i in range(n - 1):
+        if dests[i].value != (i * 5 + 2000):
+            print("Projection 4 failed")
+
+
+def change(v, new_value):
+    global planner
+    edit = EditConstraint(v, Strength.PREFERRED)
+    edits = OrderedCollection()
+    edits.append(edit)
+
+    plan = planner.extract_plan_from_constraints(edits)
+
+    for i in range(10):
+        v.value = new_value
+        plan.execute()
+
+    edit.destroy_constraint()
+
+
+# HOORAY FOR GLOBALS... Oh wait.
+# In spirit of the original, we'll keep it, but ugh.
+planner = None
+
+
+def run():
+    n = 1000
+    chain_test(n)
+    projection_test(n)
diff --git a/benchmarks/interpreter/fibonacci.py b/benchmarks/interpreter/fibonacci.py
new file mode 100644
index 0000000000..ce0907b42d
--- /dev/null
+++ b/benchmarks/interpreter/fibonacci.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.        Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.        See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+def fibonacci(n):
+    if n < 1:
+        return 0
+    if n <= 2:
+        return 1
+    return fibonacci(n - 1) + fibonacci(n - 2)
+
+def run():
+    number = 31
+    fibo_is = 1346269
+
+    fibo = fibonacci(number)
+
+    if fibo != fibo_is:
+        raise AssertionError(f"Unexpected result: {fibo}")
+
+    return fibo
diff --git a/benchmarks/interpreter/pyinit.py b/benchmarks/interpreter/pyinit.py
new file mode 100644
index 0000000000..89f205d7f1
--- /dev/null
+++ b/benchmarks/interpreter/pyinit.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+
+def run():
+    """
+    this benchmark is intentionaly left emtpy as it's sole purpose is to be 
+    used to measure python initialization time 
+    """
+    pass
diff --git a/benchmarks/interpreter/richards.py b/benchmarks/interpreter/richards.py
new file mode 100644
index 0000000000..c0b5cbb334
--- /dev/null
+++ b/benchmarks/interpreter/richards.py
@@ -0,0 +1,422 @@
+# Copyright 2008-2010 Isaac Gouy
+# Copyright (c) 2013, 2014, Regents of the University of California
+# Copyright (c) 2018, 2021, Oracle and/or its affiliates.
+# All rights reserved.
+#
+# Revised BSD license
+#
+# This is a specific instance of the Open Source Initiative (OSI) BSD license
+# template http://www.opensource.org/licenses/bsd-license.php
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#   Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+#   Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+#   Neither the name of "The Computer Language Benchmarks Game" nor the name of
+#   "The Computer Language Shootout Benchmarks" nor the name "nanobench" nor the
+#   name "bencher" nor the names of its contributors may be used to endorse or
+#   promote products derived from this software without specific prior written
+#   permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# based on a Java version:
+#  Based on original version written in BCPL by Dr Martin Richards
+#  in 1981 at Cambridge University Computer Laboratory, England
+#  and a C++ version derived from a Smalltalk version written by
+#  L Peter Deutsch.
+#  Java version:  Copyright (C) 1995 Sun Microsystems, Inc.
+#  Translation from C++, Mario Wolczko
+#  Outer loop added by Alex Jacoby
+from __future__ import print_function
+
+# Task IDs
+I_IDLE = 1
+I_WORK = 2
+I_HANDLERA = 3
+I_HANDLERB = 4
+I_DEVA = 5
+I_DEVB = 6
+
+# Packet types
+K_DEV = 1000
+K_WORK = 1001
+
+# Packet
+
+BUFSIZE = 4
+
+BUFSIZE_RANGE = list(range(BUFSIZE))
+
+class Packet(object):
+    def __init__(self,l,i,k):
+        self.link = l
+        self.ident = i
+        self.kind = k
+        self.datum = 0
+        self.data = [0] * BUFSIZE
+
+    def append_to(self,lst):
+        self.link = None
+        if lst is None:
+            return self
+        else:
+            p = lst
+            next = p.link
+            while next is not None:
+                p = next
+                next = p.link
+            p.link = self
+            return lst
+
+# Task Records
+
+class TaskRec(object):
+    pass
+
+class DeviceTaskRec(TaskRec):
+    def __init__(self):
+        self.pending = None
+
+class IdleTaskRec(TaskRec):
+    def __init__(self):
+        self.control = 1
+        self.count = 10000
+
+class HandlerTaskRec(TaskRec):
+    def __init__(self):
+        self.work_in = None
+        self.device_in = None
+
+    def workInAdd(self,p):
+        self.work_in = p.append_to(self.work_in)
+        return self.work_in
+
+    def deviceInAdd(self,p):
+        self.device_in = p.append_to(self.device_in)
+        return self.device_in
+
+class WorkerTaskRec(TaskRec):
+    def __init__(self):
+        self.destination = I_HANDLERA
+        self.count = 0
+# Task
+
+class TaskState(object):
+    def __init__(self):
+        self.packet_pending = True
+        self.task_waiting = False
+        self.task_holding = False
+
+    def packetPending(self):
+        self.packet_pending = True
+        self.task_waiting = False
+        self.task_holding = False
+        return self
+
+    def waiting(self):
+        self.packet_pending = False
+        self.task_waiting = True
+        self.task_holding = False
+        return self
+
+    def running(self):
+        self.packet_pending = False
+        self.task_waiting = False
+        self.task_holding = False
+        return self
+
+    def waitingWithPacket(self):
+        self.packet_pending = True
+        self.task_waiting = True
+        self.task_holding = False
+        return self
+
+    def isPacketPending(self):
+        return self.packet_pending
+
+    def isTaskWaiting(self):
+        return self.task_waiting
+
+    def isTaskHolding(self):
+        return self.task_holding
+
+    def isTaskHoldingOrWaiting(self):
+        return self.task_holding or (not self.packet_pending and self.task_waiting)
+
+    def isWaitingWithPacket(self):
+        return self.packet_pending and self.task_waiting and not self.task_holding
+
+
+
+
+
+tracing = False
+layout = 0
+
+def trace(a):
+    global layout
+    layout -= 1
+    if layout <= 0:
+        print()
+        layout = 50
+    print(a, end=' ')
+
+
+TASKTABSIZE = 10
+
+class TaskWorkArea(object):
+    def __init__(self):
+        self.taskTab = [None] * TASKTABSIZE
+
+        self.taskList = None
+
+        self.holdCount = 0
+        self.qpktCount = 0
+
+taskWorkArea = TaskWorkArea()
+
+class Task(TaskState):
+
+
+    def __init__(self,i,p,w,initialState,r):
+        self.link = taskWorkArea.taskList
+        self.ident = i
+        self.priority = p
+        self.input = w
+
+        self.packet_pending = initialState.isPacketPending()
+        self.task_waiting = initialState.isTaskWaiting()
+        self.task_holding = initialState.isTaskHolding()
+
+        self.handle = r
+
+        taskWorkArea.taskList = self
+        taskWorkArea.taskTab[i] = self
+
+    def fn(self,pkt,r):
+        raise NotImplementedError
+
+
+    def addPacket(self,p,old):
+        if self.input is None:
+            self.input = p
+            self.packet_pending = True
+            if self.priority > old.priority:
+                return self
+        else:
+            p.append_to(self.input)
+        return old
+
+
+    def runTask(self):
+        if self.isWaitingWithPacket():
+            msg = self.input
+            self.input = msg.link
+            if self.input is None:
+                self.running()
+            else:
+                self.packetPending()
+        else:
+            msg = None
+
+        return self.fn(msg,self.handle)
+
+
+    def waitTask(self):
+        self.task_waiting = True
+        return self
+
+
+    def hold(self):
+        taskWorkArea.holdCount += 1
+        self.task_holding = True
+        return self.link
+
+
+    def release(self,i):
+        t = self.findtcb(i)
+        t.task_holding = False
+        if t.priority > self.priority:
+            return t
+        else:
+            return self
+
+
+    def qpkt(self,pkt):
+        t = self.findtcb(pkt.ident)
+        taskWorkArea.qpktCount += 1
+        pkt.link = None
+        pkt.ident = self.ident
+        return t.addPacket(pkt,self)
+
+
+    def findtcb(self,id):
+        t = taskWorkArea.taskTab[id]
+        if t is None:
+            raise Exception("Bad task id %d" % id)
+        return t
+
+
+# DeviceTask
+
+
+class DeviceTask(Task):
+    def __init__(self,i,p,w,s,r):
+        Task.__init__(self,i,p,w,s,r)
+
+    def fn(self,pkt,r):
+        d = r
+        assert isinstance(d, DeviceTaskRec)
+        if pkt is None:
+            pkt = d.pending
+            if pkt is None:
+                return self.waitTask()
+            else:
+                d.pending = None
+                return self.qpkt(pkt)
+        else:
+            d.pending = pkt
+            if tracing: trace(pkt.datum)
+            return self.hold()
+
+
+
+class HandlerTask(Task):
+    def __init__(self,i,p,w,s,r):
+        Task.__init__(self,i,p,w,s,r)
+
+    def fn(self,pkt,r):
+        h = r
+        assert isinstance(h, HandlerTaskRec)
+        if pkt is not None:
+            if pkt.kind == K_WORK:
+                h.workInAdd(pkt)
+            else:
+                h.deviceInAdd(pkt)
+        work = h.work_in
+        if work is None:
+            return self.waitTask()
+        count = work.datum
+        if count >= BUFSIZE:
+            h.work_in = work.link
+            return self.qpkt(work)
+
+        dev = h.device_in
+        if dev is None:
+            return self.waitTask()
+
+        h.device_in = dev.link
+        dev.datum = work.data[count]
+        work.datum = count + 1
+        return self.qpkt(dev)
+
+# IdleTask
+
+
+class IdleTask(Task):
+    def __init__(self,i,p,w,s,r):
+        Task.__init__(self,i,0,None,s,r)
+
+    def fn(self,pkt,r):
+        i = r
+        assert isinstance(i, IdleTaskRec)
+        i.count -= 1
+        if i.count == 0:
+            return self.hold()
+        elif i.control & 1 == 0:
+            i.control //= 2
+            return self.release(I_DEVA)
+        else:
+            i.control = i.control // 2 ^ 0xd008
+            return self.release(I_DEVB)
+
+
+# WorkTask
+
+
+A = ord('A')
+
+class WorkTask(Task):
+    def __init__(self,i,p,w,s,r):
+        Task.__init__(self,i,p,w,s,r)
+
+    def fn(self,pkt,r):
+        w = r
+        assert isinstance(w, WorkerTaskRec)
+        if pkt is None:
+            return self.waitTask()
+
+        if w.destination == I_HANDLERA:
+            dest = I_HANDLERB
+        else:
+            dest = I_HANDLERA
+
+        w.destination = dest
+        pkt.ident = dest
+        pkt.datum = 0
+
+        for i in BUFSIZE_RANGE: # xrange(BUFSIZE)
+            w.count += 1
+            if w.count > 26:
+                w.count = 1
+            pkt.data[i] = A + w.count - 1
+
+        return self.qpkt(pkt)
+
+def schedule():
+    t = taskWorkArea.taskList
+    while t is not None:
+        pkt = None
+
+        if tracing:
+            print("tcb =",t.ident)
+
+        if t.isTaskHoldingOrWaiting():
+            t = t.link
+        else:
+            if tracing: trace(chr(ord("0")+t.ident))
+            t = t.runTask()
+
+def run():
+    taskWorkArea.holdCount = 0
+    taskWorkArea.qpktCount = 0
+
+    IdleTask(I_IDLE, 1, 10000, TaskState().running(), IdleTaskRec())
+
+    wkq = Packet(None, 0, K_WORK)
+    wkq = Packet(wkq , 0, K_WORK)
+    WorkTask(I_WORK, 1000, wkq, TaskState().waitingWithPacket(), WorkerTaskRec())
+
+    wkq = Packet(None, I_DEVA, K_DEV)
+    wkq = Packet(wkq , I_DEVA, K_DEV)
+    wkq = Packet(wkq , I_DEVA, K_DEV)
+    HandlerTask(I_HANDLERA, 2000, wkq, TaskState().waitingWithPacket(), HandlerTaskRec())
+
+    wkq = Packet(None, I_DEVB, K_DEV)
+    wkq = Packet(wkq , I_DEVB, K_DEV)
+    wkq = Packet(wkq , I_DEVB, K_DEV)
+    HandlerTask(I_HANDLERB, 3000, wkq, TaskState().waitingWithPacket(), HandlerTaskRec())
+
+    wkq = None
+    DeviceTask(I_DEVA, 4000, wkq, TaskState().waiting(), DeviceTaskRec())
+    DeviceTask(I_DEVB, 5000, wkq, TaskState().waiting(), DeviceTaskRec())
+
+    schedule()
+
+    return taskWorkArea.holdCount == 9297 and taskWorkArea.qpktCount == 23246
\ No newline at end of file
diff --git a/benchmarks/interpreter/sieve.py b/benchmarks/interpreter/sieve.py
new file mode 100644
index 0000000000..7bba252016
--- /dev/null
+++ b/benchmarks/interpreter/sieve.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.    Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.    See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+def run():
+    number = 600000
+    primes = list(range(0, number+1))
+
+    i = 2
+    while (i**2) <= number:
+        if primes[i] != 0:
+            for j in range(2, number):
+                if primes[i] * j > number:
+                    break
+                else:
+                    primes[primes[i] * j] = 0
+        i += 1
+
+    count = 0
+    for c in range(2, number+1):
+        if primes[c] != 0:
+            count += 1
+
+    return count
diff --git a/benchmarks/warmup/pyflate-fast.py b/benchmarks/warmup/pyflate-fast.py
new file mode 100644
index 0000000000..cd77b70f59
--- /dev/null
+++ b/benchmarks/warmup/pyflate-fast.py
@@ -0,0 +1,717 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+# This benchmark is derived from the URL below, which had the following
+# copyright notice.
+#
+# Copyright 2006--2007-01-21 Paul Sladen
+#
+# You may use and distribute this code under any DFSG-compatible
+# license (eg. BSD, GNU GPLv2).
+#
+# Stand-alone pure-Python DEFLATE (gzip) and bzip2 decoder/decompressor.
+# This is probably most useful for research purposes/index building;  there
+# is certainly some room for improvement in the Huffman bit-matcher.
+#
+# With the as-written implementation, there was a known bug in BWT
+# decoding to do with repeated strings.  This has been worked around;
+# see 'bwt_reverse()'.  Correct output is produced in all test cases
+# but ideally the problem would be found...
+import sys
+if sys.version_info[0] > 2:
+    xrange = range
+
+class BitfieldBase(object):
+    def __init__(self, x):
+        if isinstance(x,BitfieldBase):
+            self.f = x.f
+            self.bits = x.bits
+            self.bitfield = x.bitfield
+            self.count = x.bitfield
+        else:
+            self.f = x
+            self.bits = 0
+            self.bitfield = 0x0
+            self.count = 0
+    def _read(self, n):
+        s = self.f.read(n)
+        if not s:
+            raise "Length Error"
+        self.count += len(s)
+        return s
+    def needbits(self, n):
+        while self.bits < n:
+            self._more()
+    def _mask(self, n):
+        return (1 << n) - 1
+    def toskip(self):
+        return self.bits & 0x7
+    def align(self):
+        self.readbits(self.toskip())
+    def dropbits(self, n = 8):
+        while n >= self.bits and n > 7:
+            n -= self.bits
+            self.bits = 0
+            n -= len(self.f._read(n >> 3)) << 3
+        if n:
+            self.readbits(n)
+        # No return value
+    def dropbytes(self, n = 1):
+        self.dropbits(n << 3)
+    def tell(self):
+        return self.count - ((self.bits+7) >> 3), 7 - ((self.bits-1) & 0x7)
+    def tellbits(self):
+        bytes, bits = self.tell()
+        return (bytes << 3) + bits
+
+class Bitfield(BitfieldBase):
+    def _more(self):
+        c = self._read(1)
+        self.bitfield += ord(c) << self.bits
+        self.bits += 8
+    def snoopbits(self, n = 8):
+        if n > self.bits:
+            self.needbits(n)
+        return self.bitfield & self._mask(n)
+    def readbits(self, n = 8):
+        if n > self.bits:
+            self.needbits(n)
+        r = self.bitfield & self._mask(n)
+        self.bits -= n
+        self.bitfield >>= n
+        return r
+
+class RBitfield(BitfieldBase):
+    def _more(self):
+        c = self._read(1)
+        self.bitfield <<= 8
+        self.bitfield += ord(c)
+        self.bits += 8
+    def snoopbits(self, n = 8):
+        if n > self.bits:
+            self.needbits(n)
+        return (self.bitfield >> (self.bits - n)) & self._mask(n)
+    def readbits(self, n = 8):
+        if n > self.bits:
+            self.needbits(n)
+        r = (self.bitfield >> (self.bits - n)) & self._mask(n)
+        self.bits -= n
+        self.bitfield &= ~(self._mask(n) << self.bits)
+        return r
+
+def printbits(v, n):
+    o = ''
+    for i in range(n):
+        if v & 1:
+            o = '1' + o
+        else:
+            o = '0' + o
+        v >>= 1
+    return o
+
+class HuffmanLength(object):
+    def __init__(self, code, bits = 0):
+        self.code = code
+        self.bits = bits
+        self.symbol = None
+    def __repr__(self):
+        return {}.format((self.code, self.bits, self.symbol, self.reverse_symbol))
+    def __cmp__(self, other):
+        if self.bits == other.bits:
+            return cmp(self.code, other.code)
+        else:
+            return cmp(self.bits, other.bits)
+    def __lt__(self, other):
+        if self.bits == other.bits:
+            return self.code < other.code
+        else:
+            return self.bits < other.bits
+
+def reverse_bits(v, n):
+    a = 1 << 0
+    b = 1 << (n - 1)
+    z = 0
+    for i in range(n-1, -1, -2):
+        z |= (v >> i) & a
+        z |= (v << i) & b
+        a <<= 1
+        b >>= 1
+    return z
+
+def reverse_bytes(v, n):
+    a = 0xff << 0
+    b = 0xff << (n - 8)
+    z = 0
+    for i in range(n-8, -8, -16):
+        z |= (v >> i) & a
+        z |= (v << i) & b
+        a <<= 8
+        b >>= 8
+    return z
+
+class HuffmanTable(object):
+    def __init__(self, bootstrap):
+        l = []
+        start, bits = bootstrap[0]
+        for finish, endbits in bootstrap[1:]:
+            if bits:
+                for code in range(start, finish):
+                    l.append(HuffmanLength(code, bits))
+            start, bits = finish, endbits
+            if endbits == -1:
+                break
+        l.sort()
+        self.table = l
+
+    def populate_huffman_symbols(self):
+        bits, symbol = -1, -1
+        for x in self.table:
+            symbol += 1
+            if x.bits != bits:
+                symbol <<= (x.bits - bits)
+                bits = x.bits
+            x.symbol = symbol
+            x.reverse_symbol = reverse_bits(symbol, bits)
+            #print(printbits(x.symbol, bits), printbits(x.reverse_symbol, bits))
+
+    def tables_by_bits(self):
+        d = {}
+        for x in self.table:
+            try:
+                d[x.bits].append(x)
+            except:
+                d[x.bits] = [x]
+        pass
+
+    def min_max_bits(self):
+        self.min_bits, self.max_bits = 16, -1
+        for x in self.table:
+            if x.bits < self.min_bits: self.min_bits = x.bits
+            if x.bits > self.max_bits: self.max_bits = x.bits
+
+    def _find_symbol(self, bits, symbol, table):
+        for h in table:
+            if h.bits == bits and h.reverse_symbol == symbol:
+                #print("found, processing", h.code)
+                return h.code
+        return -1
+
+    def find_next_symbol(self, field, reversed = True):
+        cached_length = -1
+        cached = None
+        for x in self.table:
+            if cached_length != x.bits:
+                cached = field.snoopbits(x.bits)
+                cached_length = x.bits
+            if (reversed and x.reverse_symbol == cached) or (not reversed and x.symbol == cached):
+                field.readbits(x.bits)
+                return x.code
+        raise "unfound symbol, even after end of table @ {}".format(field.tell())
+
+        for bits in range(self.min_bits, self.max_bits + 1):
+            #print(printbits(field.snoopbits(bits),bits))
+            r = self._find_symbol(bits, field.snoopbits(bits), self.table)
+            if 0 <= r:
+                field.readbits(bits)
+                return r
+            elif bits == self.max_bits:
+                raise "unfound symbol, even after max_bits"
+
+class OrderedHuffmanTable(HuffmanTable):
+    def __init__(self, lengths):
+        l = len(lengths)
+        z = list(zip(range(l), lengths)) + [(l, -1)]
+        HuffmanTable.__init__(self, z)
+
+def code_length_orders(i):
+    return (16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15)[i]
+
+def distance_base(i):
+    return (1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577)[i]
+
+def length_base(i):
+    return (3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258)[i-257]
+
+def extra_distance_bits(n):
+    if 0 <= n <= 1:
+        return 0
+    elif 2 <= n <= 29:
+        return (n >> 1) - 1
+    else:
+        raise "illegal distance code"
+
+def extra_length_bits(n):
+    if 257 <= n <= 260 or n == 285:
+        return 0
+    elif 261 <= n <= 284:
+        return ((n-257) >> 2) - 1
+    else:
+        raise "illegal length code"
+
+def move_to_front(l, c):
+    l[:] = l[c:c+1] + l[0:c] + l[c+1:]
+
+def bwt_transform(L):
+    # Semi-inefficient way to get the character counts
+    F = ''.join(sorted(L))
+    base = []
+    for i in range(256):
+        base.append(F.find(chr(i)))
+
+    pointers = [-1] * len(L)
+    for i, char in enumerate(L):
+        symbol = ord(char)
+        pointers[base[symbol]] = i
+        base[symbol] += 1
+    return pointers
+
+def bwt_reverse(L, end):
+    out = []
+    if len(L):
+        T = bwt_transform(L)
+
+        # STRAGENESS WARNING: There was a bug somewhere here in that
+        # if the output of the BWT resolves to a perfect copy of N
+        # identical strings (think exact multiples of 255 'X' here),
+        # then a loop is formed.  When decoded, the output string would
+        # be cut off after the first loop, typically '\0\0\0\0\xfb'.
+        # The previous loop construct was:
+        #
+        #  next = T[end]
+        #  while next != end:
+        #      out += L[next]
+        #      next = T[next]
+        #  out += L[next]
+        #
+        # For the moment, I've instead replaced it with a check to see
+        # if there has been enough output generated.  I didn't figured
+        # out where the off-by-one-ism is yet---that actually produced
+        # the cyclic loop.
+
+        for i in xrange(len(L)):
+            end = T[end]
+            out.append(L[end])
+
+    return "".join(out)
+
+def compute_used(b):
+    huffman_used_map = b.readbits(16)
+    #print('used map', hex(huffman_used_map))
+    map_mask = 1 << 15
+    used = []
+    while map_mask > 0:
+        if huffman_used_map & map_mask:
+            huffman_used_bitmap = b.readbits(16)
+            bit_mask = 1 << 15
+            while bit_mask > 0:
+                if huffman_used_bitmap & bit_mask:
+                    #print('hit', len(used))
+                    pass
+                used += [bool(huffman_used_bitmap & bit_mask)]
+                bit_mask >>= 1
+        else:
+            used += [False] * 16
+        map_mask >>= 1
+    return used
+
+def compute_selectors_list(b, huffman_groups):
+    selectors_used = b.readbits(15)
+    #print('selectors used', selectors_used)
+    mtf = list(range(huffman_groups))
+    selectors_list = []
+    for i in range(selectors_used):
+        # zero-terminated bit runs (0..62) of MTF'ed huffman table
+        c = 0
+        while b.readbits(1):
+            c += 1
+            if c >= huffman_groups:
+                raise "Bzip2 chosen selector greater than number of groups (max 6)"
+        if c >= 0:
+            move_to_front(mtf, c)
+        #print(c, mtf)
+        selectors_list.append(mtf[0])
+    return selectors_list
+
+def compute_tables(b, huffman_groups, symbols_in_use):
+    groups_lengths = []
+    for j in range(huffman_groups):
+        length = start_huffman_length = b.readbits(5)
+        #print('start_huffman_length', start_huffman_length)
+        lengths = []
+        for i in range(symbols_in_use):
+            if not 0 <= length <= 20:
+                raise "Bzip2 Huffman length code outside range 0..20"
+            while b.readbits(1):
+                length -= (b.readbits(1) * 2) - 1
+            lengths += [length]
+        groups_lengths += [lengths]
+        #print(groups_lengths)
+
+    tables = []
+    for g in groups_lengths:
+        codes = OrderedHuffmanTable(g)
+        codes.populate_huffman_symbols()
+        codes.min_max_bits()
+        tables.append(codes)
+    return tables
+
+def decode_huffman_block(b, out):
+    #print('bzip2 Huffman block')
+    randomised = b.readbits(1)
+    if randomised:
+        raise "Bzip2 randomised support not implemented"
+    pointer = b.readbits(24)
+    #print('pointer', pointer, hex(pointer))
+    used = compute_used(b)
+
+    huffman_groups = b.readbits(3)
+    #print('huffman groups', huffman_groups)
+    if not 2 <= huffman_groups <= 6:
+        raise "Bzip2: Number of Huffman groups not in range 2..6"
+
+    selectors_list = compute_selectors_list(b, huffman_groups)
+    symbols_in_use = sum(used) + 2  # remember RUN[AB] RLE symbols
+    tables = compute_tables(b, huffman_groups, symbols_in_use)
+
+    #favourites = map(chr,range(sum(used)))
+    #favourites = string.join([y for x,y in map(None,used,map(chr,range(len(used)))) if x],'')
+    favourites = list([chr(i) for i, x in enumerate(used) if x])
+
+    data_start = b.tellbits()
+    selector_pointer = 0
+    decoded = 0
+    # Main Huffman loop
+    repeat = repeat_power = 0
+    buffer = []
+    t = None
+    while True:
+        decoded -= 1
+        if decoded <= 0:
+            #print('RETABLE TIME', selectors_list[selector_pointer])
+            decoded = 50 # Huffman table re-evaluate/switch length
+            if selector_pointer <= len(selectors_list):
+                t = tables[selectors_list[selector_pointer]]
+                selector_pointer += 1
+            #print('tables changed', tables[0].table)
+        #print(b.tell())
+        r = t.find_next_symbol(b, False)
+        #print('symbol', r)
+        if 0 <= r <= 1:
+            if repeat == 0:
+                repeat_power = 1
+            #print('run', repeat)
+            repeat += repeat_power << r
+            repeat_power <<= 1
+            continue
+        elif repeat > 0:
+            # Remember kids: If there is only one repeated
+            # real symbol, it is encoded with *zero* Huffman
+            # bits and not output... so buffer[-1] doesn't work.
+            #print('runfinal', repeat)
+            buffer.append(favourites[0] * repeat)
+            repeat = 0
+        if r == symbols_in_use - 1:
+            #print('finished {} .. {}'.format(buffer[:10], buffer[-10:]), 'len', len(buffer))
+            break
+        else:
+            o = favourites[r-1]
+            #print('pre {}'.format(favourites))
+            move_to_front(favourites, r-1)
+            #print('post {}'.format(favourites))
+            #print('output {}.format(o))
+            buffer.append(o)
+            pass
+    #print('huffman {}'.format(buffer), pointer, len(buffer))
+    #nearly_there = bwt_reverse(buffer, len(buffer)-pointer-1)
+    nt = nearly_there = bwt_reverse("".join(buffer), pointer)
+    #print('nearly there {}'.format(nearly_there))
+    i = 0
+    # Pointless/irritating run-length encoding step
+    while i < len(nearly_there):
+        #print('RLE decode {}'.format(nt[i:]))
+        if i < len(nearly_there) - 4 and nt[i] == nt[i+1] == nt[i+2] == nt[i+3]:
+            out.append(nearly_there[i] * (ord(nearly_there[i+4]) + 4))
+            i += 5
+        else:
+            out.append(nearly_there[i])
+            i += 1
+    #print('done {} .. {}'.format(done[:10], done[-10:]), 'len', len(done))
+
+    #raise "Bip2 block support not implemented"
+
+# Sixteen bits of magic have been removed by the time we start decoding
+def bzip2_main(input):
+    b = RBitfield(input)
+
+    method = b.readbits(8)
+    if method != ord('h'):
+        raise "Unknown (not type 'h'uffman Bzip2) compression method"
+
+    blocksize = b.readbits(8)
+    if ord('1') <= blocksize <= ord('9'):
+        blocksize = blocksize - ord('0')
+    else:
+        raise "Unknown (not size '0'-'9') Bzip2 blocksize"
+
+    out = []
+    while True:
+        #header_start = b.tellbits()
+        blocktype = b.readbits(48)
+        crc = b.readbits(32)
+        #print(hex(blocktype))
+        #print(hex(crc))
+        if blocktype == 0x314159265359: # (pi)
+            decode_huffman_block(b, out)
+        elif blocktype == 0x177245385090: # sqrt(pi)
+            #print('bzip2 end-of-stream block')
+            b.align()
+            break
+        else:
+            raise "Illegal Bzip2 blocktype"
+    #print(len(out), set([len(s) for s in out]))
+    return ''.join(out)
+
+# Sixteen bits of magic have been removed by the time we start decoding
+def gzip_main(field):
+    b = Bitfield(field)
+    method = b.readbits(8)
+    if method != 8:
+        raise "Unknown (not type eight DEFLATE) compression method"
+
+    # Use flags, drop modification time, extra flags and OS creator type.
+    flags = b.readbits(8)
+    #print('flags', hex(flags))
+    mtime = b.readbits(32)
+    #print('mtime', hex(mtime))
+    extra_flags = b.readbits(8)
+    #print('extra_flags', hex(extra_flags))
+    os_type = b.readbits(8)
+    #print('os_type', hex(os_type))
+
+    if flags & 0x04: # structured GZ_FEXTRA miscellaneous data
+        xlen = b.readbits(16)
+        b.dropbytes(xlen)
+    while flags & 0x08: # original GZ_FNAME filename
+        if not b.readbits(8):
+            break
+    while flags & 0x10: # human readable GZ_FCOMMENT
+        if not b.readbits(8):
+            break
+    if flags & 0x02: # header-only GZ_FHCRC checksum
+        b.readbits(16)
+
+    #print("gzip header skip", b.tell())
+    out = []
+
+    #print('header 0 count 0 bits', b.tellbits())
+
+    while True:
+        header_start = b.tell()
+        bheader_start = b.tellbits()
+        #print('new block at', b.tell())
+        lastbit = b.readbits(1)
+        #print("last bit", hex(lastbit))
+        blocktype = b.readbits(2)
+        #print("deflate-blocktype", blocktype, 'beginning at', header_start)
+
+        #print('raw block data at', b.tell())
+        if blocktype == 0:
+            b.align()
+            length = b.readbits(16)
+            if length & b.readbits(16):
+                raise "stored block lengths do not match each other"
+            #print("stored block of length", length)
+            #print('raw data at', b.tell(), 'bits', b.tellbits() - bheader_start)
+            #print('header 0 count 0 bits', b.tellbits() - bheader_start)
+            for i in range(length):
+                out.append(chr(b.readbits(8)))
+            #print('linear', b.tell()[0], 'count', length, 'bits', b.tellbits() - bheader_start)
+
+        elif blocktype == 1 or blocktype == 2: # Huffman
+            main_literals, main_distances = None, None
+
+            if blocktype == 1: # Static Huffman
+                static_huffman_bootstrap = [(0, 8), (144, 9), (256, 7), (280, 8), (288, -1)]
+                static_huffman_lengths_bootstrap = [(0, 5), (32, -1)]
+                main_literals = HuffmanTable(static_huffman_bootstrap)
+                main_distances = HuffmanTable(static_huffman_lengths_bootstrap)
+
+            elif blocktype == 2: # Dynamic Huffman
+                literals = b.readbits(5) + 257
+                distances = b.readbits(5) + 1
+                code_lengths_length = b.readbits(4) + 4
+
+                l = [0] * 19
+                for i in range(code_lengths_length):
+                    l[code_length_orders(i)] = b.readbits(3)
+
+                dynamic_codes = OrderedHuffmanTable(l)
+                dynamic_codes.populate_huffman_symbols()
+                dynamic_codes.min_max_bits()
+
+                # Decode the code_lengths for both tables at once,
+                # then split the list later
+
+                code_lengths = []
+                n = 0
+                while n < (literals + distances):
+                    r = dynamic_codes.find_next_symbol(b)
+                    if 0 <= r <= 15: # literal bitlength for this code
+                        count = 1
+                        what = r
+                    elif r == 16: # repeat last code
+                        count = 3 + b.readbits(2)
+                        # Is this supposed to default to '0' if in the zeroth position?
+                        what = code_lengths[-1]
+                    elif r == 17: # repeat zero
+                        count = 3 + b.readbits(3)
+                        what = 0
+                    elif r == 18: # repeat zero lots
+                        count = 11 + b.readbits(7)
+                        what = 0
+                    else:
+                        raise "next code length is outside of the range 0 <= r <= 18"
+                    code_lengths += [what] * count
+                    n += count
+
+                main_literals = OrderedHuffmanTable(code_lengths[:literals])
+                main_distances = OrderedHuffmanTable(code_lengths[literals:])
+
+            # Common path for both Static and Dynamic Huffman decode now
+
+            data_start = b.tell()
+            #print('raw data at', data_start, 'bits', b.tellbits() - bheader_start)
+            #print('header 0 count 0 bits', b.tellbits() - bheader_start)
+
+            main_literals.populate_huffman_symbols()
+            main_distances.populate_huffman_symbols()
+
+            main_literals.min_max_bits()
+            main_distances.min_max_bits()
+
+            literal_count = 0
+            literal_start = 0
+
+            while True:
+                lz_start = b.tellbits()
+                r = main_literals.find_next_symbol(b)
+                if 0 <= r <= 255:
+                    if literal_count == 0:
+                        literal_start = lz_start
+                    literal_count += 1
+                    #print('found literal {}'.format(chr(r)))
+                    out.append(chr(r))
+                elif r == 256:
+                    if literal_count > 0:
+                        #print('add 0 count', literal_count, 'bits', lz_start-literal_start, 'dat) {}'.format(out[-literal_count:]))
+                        literal_count = 0
+                    #print('eos 0 count 0 bits', b.tellbits() - lz_start)
+                    #print('end of Huffman block encountered')
+                    break
+                elif 257 <= r <= 285: # dictionary lookup
+                    if literal_count > 0:
+                        #print('add 0 count', literal_count, 'bits', lz_start-literal_start, 'data {}'.format(out[-literal_count:]))
+                        literal_count = 0
+                    length_extra = b.readbits(extra_length_bits(r))
+                    length = length_base(r) + length_extra
+                    #print('dictionary lookup: length', length,)
+
+                    r1 = main_distances.find_next_symbol(b)
+                    if 0 <= r1 <= 29:
+                        distance = distance_base(r1) + b.readbits(extra_distance_bits(r1))
+                        cached_length = length
+                        while length > distance:
+                            out += out[-distance:]
+                            length -= distance
+                        if length == distance:
+                            out += out[-distance:]
+                        else:
+                            out += out[-distance:length-distance]
+                        #print('copy', -distance, 'count', cached_length, 'bits', b.tellbits() - lz_start, 'data {}'.format(out[-cached_length:]))
+                    elif 30 <= r1 <= 31:
+                        raise "illegal unused distance symbol in use @ {}".format(b.tell())
+                elif 286 <= r <= 287:
+                    raise "illegal unused literal/length symbol in use @ {}".format(b.tell())
+        elif blocktype == 3:
+            raise "illegal unused blocktype in use @ {}".format(b.tell())
+
+        if lastbit:
+            #print("this was the last block, time to leave", b.tell())
+            break
+
+    footer_start = b.tell()
+    bfooter_start = b.tellbits()
+    b.align()
+    crc = b.readbits(32)
+    final_length = b.readbits(32)
+    #print(len(out))
+    next_unused = b.tell()
+    #print('deflate-end-of-stream', 5, 'beginning at', footer_start, 'raw data at', next_unused, 'bits', b.tellbits() - bfooter_start)
+    #print('deflate-end-of-stream')
+    #print('crc', hex(crc), 'final length', final_length)
+    #print('header 0 count 0 bits', b.tellbits()-bfooter_start)
+
+    return "".join(out)
+
+import sys, os
+
+def _main():
+    filename = os.path.join(os.path.dirname(__file__), "graalpython-pyflate-benchmark-resource.tar.gz")
+    input = open(filename, 'rb')
+    field = RBitfield(input)
+
+    magic = field.readbits(16)
+    if magic == 0x1f8b: # GZip
+        out = gzip_main(field)
+    elif magic == 0x425a: # BZip2
+        out = bzip2_main(field)
+    else:
+        raise "Unknown file magic "+hex(magic)+", not a gzip/bzip2 file"
+
+    from hashlib import md5
+    # print(md5(out.encode('utf-8')).hexdigest())
+    assert md5(out.encode('utf-8')).hexdigest() == "0d8772135b981f5cfd9acbf23ba643d7"
+    input.close()
+
+run = _main
+
+
+def main(warmup=20, iterations=30):
+    import time
+    for i in range(warmup):
+        t1 = time.time()
+        _main()
+        t2 = time.time() - t1
+        print("Warmup", i, ":", t2 * 1000, "ms")
+    for i in range(iterations):
+        t1 = time.time()
+        _main()
+        t2 = time.time() - t1
+        print("Iteration", i, ":", t2 * 1000, "ms")
+
+
+if __name__ == "__main__" and sys.implementation.name != "graalpy":
+    print(f"Running {os.path.basename(__file__)}")
+    times = main(20, 30)
+    for idx, time in enumerate(times):
+        print("Iteration", idx, ":", time, "ms")
diff --git a/benchmarks/warmup/raytrace.py b/benchmarks/warmup/raytrace.py
new file mode 100644
index 0000000000..c69fe3f6d6
--- /dev/null
+++ b/benchmarks/warmup/raytrace.py
@@ -0,0 +1,417 @@
+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+# This benchmark is derived from the URL below, which had the following
+# copyright notice and was originally under MIT.
+#
+# Copyright Callum and Tony Garnock-Jones, 2008.
+# This file contains definitions for a simple raytracer.
+# This file may be freely redistributed under the MIT license.
+
+import math
+
+EPSILON = 0.00001
+INF = 1.0e9
+
+class Vector(object):
+    def __init__(self, initx, inity, initz):
+        self.x = initx
+        self.y = inity
+        self.z = initz
+
+    def __str__(self):
+        return '(%s,%s,%s)' % (self.x, self.y, self.z)
+
+    def __repr__(self):
+        return 'Vector(%s,%s,%s)' % (self.x, self.y, self.z)
+
+    def magnitude(self):
+        return math.sqrt(self.dot(self))
+
+    def __add__(self, other):
+        return Vector(self.x + other.x, self.y + other.y, self.z + other.z)
+
+    def __sub__(self, other):
+        return Vector(self.x - other.x, self.y - other.y, self.z - other.z)
+
+    def scale(self, factor):
+        return Vector(factor * self.x, factor * self.y, factor * self.z)
+
+    def dot(self, other):
+        return (self.x * other.x) + (self.y * other.y) + (self.z * other.z)
+
+    def cross(self, other):
+        return Vector(self.y * other.z - self.z * other.y,
+                      self.z * other.x - self.x * other.z,
+                      self.x * other.y - self.y * other.x)
+
+    def normalized(self):
+        return self.scale(1.0 / self.magnitude())
+
+    def negated(self):
+        return self.scale(-1)
+
+    def __eq__(self, other):
+        return (self.x == other.x) and (self.y == other.y) and (self.z == other.z)
+
+    def isVector(self):
+        return True
+
+    def isPoint(self):
+        return False
+
+    def reflectThrough(self, normal):
+        d = normal.scale(self.dot(normal))
+        return self - d.scale(2)
+
+VZERO = Vector(0,0,0)
+VRIGHT = Vector(1,0,0)
+VUP = Vector(0,1,0)
+VOUT = Vector(0,0,1)
+
+if not (VRIGHT.reflectThrough(VUP) == VRIGHT):
+    print(1/0)
+if not (Vector(-1,-1,0).reflectThrough(VUP) == Vector(-1,1,0)):
+    print(1/0)
+
+class Point(object):
+    def __init__(self, initx, inity, initz):
+        self.x = initx
+        self.y = inity
+        self.z = initz
+
+    def __str__(self):
+        return '(%s,%s,%s)' % (self.x, self.y, self.z)
+
+    def __repr__(self):
+        return 'Point(%s,%s,%s)' % (self.x, self.y, self.z)
+
+    def __add__(self, other):
+        return Point(self.x + other.x, self.y + other.y, self.z + other.z)
+
+    def __sub__(self, other):
+        return Vector(self.x - other.x, self.y - other.y, self.z - other.z)
+
+    def isVector(self):
+        return False
+
+    def isPoint(self):
+        return True
+
+class Sphere(object):
+    def __init__(self, centre, radius):
+        self.centre = centre
+        self.radius = radius
+
+    def __repr__(self):
+        return 'Sphere(%s,%s)' % (repr(self.centre), self.radius)
+
+    def intersectionTime(self, ray):
+        cp = self.centre - ray.point
+        v = cp.dot(ray.vector)
+        discriminant = (self.radius * self.radius) - (cp.dot(cp) - v*v)
+        if discriminant < 0:
+            return INF + 1
+        else:
+            return v - math.sqrt(discriminant)
+
+    def normalAt(self, p):
+        return (p - self.centre).normalized()
+
+class Halfspace(object):
+    def __init__(self, point, normal):
+        self.point = point
+        self.normal = normal.normalized()
+
+    def __repr__(self):
+        return 'Halfspace(%s,%s)' % (repr(self.point), repr(self.normal))
+
+    def intersectionTime(self, ray):
+        v = ray.vector.dot(self.normal)
+        if v:
+            return 1 / -v
+        else:
+            return INF + 1
+
+    def normalAt(self, p):
+        return self.normal
+
+class Ray(object):
+    def __init__(self, point, vector):
+        self.point = point
+        self.vector = vector.normalized()
+
+    def __repr__(self):
+        return 'Ray(%s,%s)' % (repr(self.point), repr(self.vector))
+
+    def pointAtTime(self, t):
+        return self.point + self.vector.scale(t)
+
+PZERO = Point(0,0,0)
+
+a = Vector(3,4,12)
+b = Vector(1,1,1)
+
+class PpmCanvas(object):
+    def __init__(self, width, height, filenameBase):
+        self.bytes = [0] * (width * height * 3)
+        for i in range(width * height):
+            self.bytes[i * 3 + 2] = 255
+        self.width = width
+        self.height = height
+        self.filenameBase = filenameBase
+
+    def plot(self, x, y, r, g, b):
+        i = ((self.height - y - 1) * self.width + x) * 3
+        self.bytes[i  ] = max(0, min(255, int(r * 255)))
+        self.bytes[i+1] = max(0, min(255, int(g * 255)))
+        self.bytes[i+2] = max(0, min(255, int(b * 255)))
+
+    def save(self):
+        with open(self.filenameBase + '.ppm', 'wb') as f:
+            f.write('P6 %d %d 255\n' % (self.width, self.height))
+            l = []
+            for c in self.bytes:
+                l.append(chr(c))
+            f.write(''.join(l))
+
+def firstIntersection(intersections):
+    result = intersections[0][0], INF+1, intersections[0][2]
+    for i in intersections:
+        candidateT = i[1]
+        if candidateT < INF and candidateT > -EPSILON:
+            if result[1] > INF or candidateT < result[1]:
+                result = i
+    return result
+
+class Scene(object):
+    def __init__(self):
+        self.objects = []
+        self.lightPoints = []
+        self.position = Point(0, 1.8, 10)
+        self.lookingAt = PZERO
+        self.fieldOfView = 45
+        self.recursionDepth = 0
+
+    def lookAt(self, p):
+        self.lookingAt = p
+
+    def addObject(self, on, oi, sc):
+        self.objects.append((on, oi, sc))
+
+    def addLight(self, p):
+        self.lightPoints.append(p)
+
+    def render(self, canvas):
+        #print 'Computing field of view'
+        fovRadians = math.pi * (self.fieldOfView / 2.0) / 180.0
+        halfWidth = math.tan(fovRadians)
+        halfHeight = 0.75 * halfWidth
+        width = halfWidth * 2
+        height = halfHeight * 2
+        pixelWidth = width / (canvas.width - 1)
+        pixelHeight = height / (canvas.height - 1)
+
+        eye = Ray(self.position, self.lookingAt - self.position)
+        vpRight = eye.vector.cross(VUP).normalized()
+        vpUp = vpRight.cross(eye.vector).normalized()
+
+        #print 'Looping over pixels'
+        previousfraction = 0.0
+        for y in range(canvas.height):
+            currentfraction = 1.0 * y / canvas.height
+            if currentfraction - previousfraction > 0.05:
+                # print('%d%% complete' % int(currentfraction * 100))
+                previousfraction = currentfraction
+            for x in range(canvas.width):
+                xcomp = vpRight.scale(x * pixelWidth - halfWidth)
+                ycomp = vpUp.scale(y * pixelHeight - halfHeight)
+                ray = Ray(eye.point, eye.vector + xcomp + ycomp)
+                colour = self.rayColour(ray)
+                canvas.plot(x,y,colour[0], colour[1], colour[2])
+
+        # print('Complete.')
+        # canvas.save()
+
+    def rayColour(self, ray):
+        if self.recursionDepth > 3:
+            return (0.0,0.0,0.0)
+
+        self.recursionDepth = self.recursionDepth + 1
+        intersections = []
+        for on, oi, sc in self.objects:
+            intersections.append((on, oi(ray), sc))
+        # intersections = [(on, oi(ray), sc) for (on, oi, sc) in self.objects]
+        i = firstIntersection(intersections)
+        if i[1] > INF:
+            self.recursionDepth = self.recursionDepth - 1
+            return (0.0,0.0,0.0) ## the background colour
+        else:
+            (o, t, s) = i
+            p = ray.pointAtTime(t)
+            r = s(self, ray, p, o(p))
+            self.recursionDepth = self.recursionDepth - 1
+            return r
+
+    def _lightIsVisible(self, l, p):
+        for (on, oi, sc) in self.objects:
+            t = oi(Ray(p,l - p))
+            if t < INF and t > EPSILON:
+                return False
+        return True
+
+    def visibleLights(self, p):
+        result = []
+        for l in self.lightPoints:
+            if self._lightIsVisible(l, p):
+                result.append(l)
+        return result
+
+def addColours(a, scale, b):
+    return (a[0] + scale * b[0],
+            a[1] + scale * b[1],
+            a[2] + scale * b[2])
+
+class SimpleSurface(object):
+    def __init__(self, baseColour):
+        self.baseColour = baseColour
+        self.specularCoefficient = 0.2
+        self.lambertCoefficient = 0.6
+        self.ambientCoefficient = 1.0 - self.specularCoefficient - self.lambertCoefficient
+
+    def baseColourAt(self, p):
+        return self.baseColour
+
+    def colourAt(self, scene, ray, p, normal):
+        b = self.baseColourAt(p)
+
+        c = (0.0, 0.0, 0.0)
+        if self.specularCoefficient > 0:
+            reflectedRay = Ray(p, ray.vector.reflectThrough(normal))
+            #print p, normal, ray.vector, reflectedRay.vector
+            reflectedColour = scene.rayColour(reflectedRay)
+            c = addColours(c, self.specularCoefficient, reflectedColour)
+
+        if self.lambertCoefficient > 0:
+            lambertAmount = 0.0
+            for lightPoint in scene.visibleLights(p):
+                contribution = (lightPoint - p).normalized().dot(normal)
+                if contribution > 0:
+                    lambertAmount = lambertAmount + contribution
+            lambertAmount = min(1,lambertAmount)
+            c = addColours(c, self.lambertCoefficient * lambertAmount, b)
+
+        if self.ambientCoefficient > 0:
+            c = addColours(c, self.ambientCoefficient, b)
+
+        return c
+
+class CheckerboardSurface(object):
+    def __init__(self):
+        self.baseColour = (1.0, 1.0, 1.0)
+        self.specularCoefficient = 0.2
+        self.lambertCoefficient = 0.6
+        self.ambientCoefficient = 1.0 - self.specularCoefficient - self.lambertCoefficient
+        self.otherColour = (0.0, 0.0, 0.0)
+        self.checkSize = 1
+
+    def baseColourAt(self, p):
+        v = p - PZERO
+        v.scale(1.0 / self.checkSize)
+        if (int(abs(v.x) + 0.5) + \
+            int(abs(v.y) + 0.5) + \
+            int(abs(v.z) + 0.5)) \
+           % 2:
+            return self.otherColour
+        else:
+            return self.baseColour
+
+    def colourAt(self, scene, ray, p, normal):
+        b = self.baseColourAt(p)
+
+        c = (0.0,0.0,0.0)
+        if self.specularCoefficient > 0:
+            reflectedRay = Ray(p, ray.vector.reflectThrough(normal))
+            #print p, normal, ray.vector, reflectedRay.vector
+            reflectedColour = scene.rayColour(reflectedRay)
+            c = addColours(c, self.specularCoefficient, reflectedColour)
+
+        if self.lambertCoefficient > 0:
+            lambertAmount = 0.0
+            for lightPoint in scene.visibleLights(p):
+                contribution = (lightPoint - p).normalized().dot(normal)
+                if contribution > 0:
+                    lambertAmount = lambertAmount + contribution
+            lambertAmount = min(1,lambertAmount)
+            c = addColours(c, self.lambertCoefficient * lambertAmount, b)
+
+        if self.ambientCoefficient > 0:
+            c = addColours(c, self.ambientCoefficient, b)
+
+        return c
+
+def _main():
+    Canvas = PpmCanvas
+    # c = Canvas(4,2,'test_raytrace_tiny')
+    # c = Canvas(80,60,'test_raytrace_small')
+    # c = Canvas(160,120,'test_raytrace')
+    c = Canvas(320,240,'test_raytrace')
+    # c = Canvas(640,480,'test_raytrace_big')
+    s = Scene()
+    s.addLight(Point(30, 30, 10))
+    s.addLight(Point(-10, 100, 30))
+    s.lookAt(Point(0, 2, 0))
+
+    obj = Sphere(Point(1,3,-10), 2)
+    surf = SimpleSurface((1.0,1.0,0.0))
+    s.addObject(obj.normalAt, obj.intersectionTime, surf.colourAt)
+    for y in range(6):
+        obj = Sphere(Point(-3 - y * 0.4, 2.3, -5), 0.4)
+        surf = SimpleSurface((y / 6.0, 1 - y / 6.0, 0.5))
+        s.addObject(obj.normalAt, obj.intersectionTime, surf.colourAt)
+    obj = Halfspace(Point(0,0,0), VUP)
+    surf = CheckerboardSurface()
+    s.addObject(obj.normalAt, obj.intersectionTime, surf.colourAt)
+    s.render(c)
+
+run = _main
+
+
+def main(warmup, iterations):
+    import time
+    for i in range(warmup):
+        t1 = time.time()
+        _main()
+        t2 = time.time() - t1
+        print("Warmup", i, ":", t2 * 1000, "ms")
+    for i in range(iterations):
+        t1 = time.time()
+        _main()
+        t2 = time.time() - t1
+        print("Iteration", i, ":", t2 * 1000, "ms")
+
+
+import os, sys
+if __name__ == "__main__" and sys.implementation.name != "graalpy":
+    print(f"Running {os.path.basename(__file__)}")
+    main(20, 30)
diff --git a/mx.graalpython/copyrights/overrides b/mx.graalpython/copyrights/overrides
index f05340fac2..e014a518d1 100644
--- a/mx.graalpython/copyrights/overrides
+++ b/mx.graalpython/copyrights/overrides
@@ -696,3 +696,10 @@ mx.graalpython/mx_graalpython.py,zippy.copyright
 mx.graalpython/mx_graalpython_bench_param.py,zippy.copyright
 mx.graalpython/mx_graalpython_benchmark.py,zippy.copyright
 mx.graalpython/suite.py,no.copyright
+benchmarks/interpreter/deltablue.py,no.copyright
+benchmarks/interpreter/pyinit.py,no.copyright
+benchmarks/interpreter/sieve.py,no.copyright
+benchmarks/interpreter/fibonacci.py,no.copyright
+benchmarks/interpreter/richards.py,no.copyright
+benchmarks/warmup/raytrace.py,no.copyright
+benchmarks/warmup/pyflate-fast.py,no.copyright
diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py
index 45ef60558b..a40228a123 100644
--- a/mx.graalpython/mx_graalpython_benchmark.py
+++ b/mx.graalpython/mx_graalpython_benchmark.py
@@ -39,6 +39,7 @@
 
 import mx
 import mx_benchmark
+import mx_polybench
 from mx_benchmark import StdOutRule, java_vm_registry, OutputCapturingVm, GuestVm, VmBenchmarkSuite, AveragingBenchmarkMixin
 from mx_graalpython_bench_param import HARNESS_PATH
 
@@ -1087,3 +1088,38 @@ def register_suites():
     mx_benchmark.add_bm_suite(PythonJMHDistMxBenchmarkSuite())
     for py_bench_suite in PythonHeapBenchmarkSuite.get_benchmark_suites(HEAP_BENCHMARKS):
         mx_benchmark.add_bm_suite(py_bench_suite)
+
+
+mx_polybench.register_polybench_language(mx_suite=SUITE, language="python", distributions=["GRAALPYTHON", "GRAALPYTHON_RESOURCES"])
+
+
+def graalpython_polybench_runner(polybench_run: mx_polybench.PolybenchRunFunction, tags) -> None:
+    fork_count_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "polybench-fork-counts.json")
+    if "gate" in tags:
+        polybench_run(["--jvm", "interpreter/*.py", "--experimental-options", "--engine.Compilation=false", "-w", "1", "-i", "1"])
+        polybench_run(["--native", "interpreter/*.py", "--experimental-options", "--engine.Compilation=false", "-w", "1", "-i", "1"])
+        polybench_run(["--native", "warmup/*.py", "-w", "1", "-i", "1", "--metric=one-shot", "--mx-benchmark-args", "--fork-count-file", fork_count_file])
+    if "benchmark" in tags:
+        polybench_run(["--jvm", "interpreter/*.py", "--experimental-options", "--engine.Compilation=false"])
+        polybench_run(["--native", "interpreter/*.py", "--experimental-options", "--engine.Compilation=false"])
+        polybench_run(["--jvm", "interpreter/*.py"])
+        polybench_run(["--native", "interpreter/*.py"])
+        polybench_run(["--native", "warmup/*.py", "--metric=one-shot", "--mx-benchmark-args", "--fork-count-file", fork_count_file])
+        polybench_run(
+            ["--jvm", "interpreter/pyinit.py", "-w", "0", "-i", "0", "--metric=none", "--mx-benchmark-args", "--fork-count-file", fork_count_file])
+        polybench_run(
+            ["--native", "interpreter/pyinit.py", "-w", "0", "-i", "0", "--metric=none", "--mx-benchmark-args", "--fork-count-file", fork_count_file])
+        polybench_run(["--jvm", "interpreter/*.py", "--metric=metaspace-memory"])
+        polybench_run(["--jvm", "interpreter/*.py", "--metric=application-memory"])
+        polybench_run(["--jvm", "interpreter/*.py", "--metric=allocated-bytes", "-w", "40", "-i", "10", "--experimental-options", "--engine.Compilation=false"])
+        polybench_run(["--native", "interpreter/*.py", "--metric=allocated-bytes", "-w", "40", "-i", "10", "--experimental-options", "--engine.Compilation=false"])
+        polybench_run(["--jvm", "interpreter/*.py", "--metric=allocated-bytes", "-w", "40", "-i", "10"])
+        polybench_run(["--native", "interpreter/*.py", "--metric=allocated-bytes", "-w", "40", "-i", "10"])
+    if "instructions" in tags:
+        assert mx_polybench.is_enterprise()
+        polybench_run(["--native", "interpreter/*.py", "--metric=instructions", "--experimental-options", "--engine.Compilation=false",
+                       "--mx-benchmark-args", "--fork-count-file", fork_count_file])
+
+
+mx_polybench.register_polybench_benchmark_suite(mx_suite=SUITE, name="python", languages=["python"], benchmark_distribution="GRAALPYTHON_POLYBENCH_BENCHMARKS",
+                                                benchmark_file_filter=".*py$", runner=graalpython_polybench_runner, tags={"gate", "benchmark", "instructions"})
diff --git a/mx.graalpython/polybench-fork-counts.json b/mx.graalpython/polybench-fork-counts.json
new file mode 100644
index 0000000000..66356fae63
--- /dev/null
+++ b/mx.graalpython/polybench-fork-counts.json
@@ -0,0 +1,9 @@
+{
+    "polybench:warmup/pyflate-fast.py": 3,
+    "polybench:warmup/raytracer.py": 3,
+    "polybench:interpreter/pyinit.py": 5,
+    "polybench:interpreter/deltablue.py": 3,
+    "polybench:interpreter/fibonacci.py": 0,
+    "polybench:interpreter/richards.py": 3,
+    "polybench:interpreter/sieve.py": 3
+}
diff --git a/mx.graalpython/suite.py b/mx.graalpython/suite.py
index 1993f1047d..ef9022f837 100644
--- a/mx.graalpython/suite.py
+++ b/mx.graalpython/suite.py
@@ -154,6 +154,11 @@
               "version": "RELEASE120-1",
             },
         },
+        "GRAALPYTHON_PYFLATE_BENCHMARK_RESOURCE" : {
+            # just any reasonably sized .tar.gz or .tar.bz2 for running the benchmark
+            "urls" : ["https://lafo.ssw.uni-linz.ac.at/pub/graal-external-deps/visualvm/visualvm-944-linux-amd64.tar.gz"],
+            "digest" : "sha512:72982ca01cce9dfa876687ec7b9627b81e241e6cddc8dedb976a5d06d058a067f83f5c063dc07d7ed19730ffb54af8343eae8ca0cc156353f7b18530eef73c50"
+        },
     },
 
     # --------------------------------------------------------------------------------------------------------------
@@ -1406,6 +1411,19 @@
             },
         },
 
+        "GRAALPYTHON_POLYBENCH_BENCHMARKS": {
+            "description": "Distribution for GraalPython polybench benchmarks",
+            "layout": {
+                "./interpreter/": [
+                    "file:benchmarks/interpreter/*.py",
+                ],
+                "./warmup/": [
+                    "file:benchmarks/warmup/*.py",
+                    "dependency:GRAALPYTHON_PYFLATE_BENCHMARK_RESOURCE",
+                ],
+            },
+        },
+
         "GRAALPY_NATIVE_STANDALONE_RELEASE_ARCHIVE": {
             "class": "DeliverableStandaloneArchive",
             "platformDependent": True,
diff --git a/pyproject.toml b/pyproject.toml
index 11d5b57856..7bc349315c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,4 +8,4 @@ required-version = '23'
 # Disables the black formatter in this repository
 # If individual suites want to enable the formatter, they can create a
 # pyproject.toml with their own configuration in their suite folder
-force-exclude = '.*graalpy.*|.*scripts/[^w].*py'
+force-exclude = '.*graalpy.*|.*scripts/[^w].*py|benchmarks/*'