Exploration into numpy fuzzing with Codex.

My prompt:

# write python fuzz tests using atheris library
# https://github.com/google/atheris
"""
# Generic Atheris fuzz Example
# !/usr/bin/python3
import atheris
with atheris.instrument_imports():
  import some_library
  import sys

def TestOneInput(data):
  some_library.parse(data)
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()

# When fuzzing Python, Atheris will report a failure if the Python code under test throws an uncaught exception.
"""
# Atheris FuzzedDataProvider API Reference
# Often, a bytes object is not convenient input to your code being fuzzed. Similar to libFuzzer,
# we provide a FuzzedDataProvider to translate these bytes into other input forms.
# To construct the FuzzedDataProvider, use the following code:
# fdp = atheris.FuzzedDataProvider(input_bytes)
# The FuzzedDataProvider provides the following functions:
# ConsumeBytes(count: int): Consume count bytes.
# ConsumeUnicode(count: int): Consume unicode characters. Might contain surrogate pair characters, which according to the specification are invalid in this situation. However, many core software tools (e.g. Windows file paths) support them, so other software often needs to too.
# ConsumeUnicodeNoSurrogates(count: int): Consume unicode characters, but never generate surrogate pair characters.
# ConsumeString(count: int): Alias for ConsumeBytes in Python 2, or ConsumeUnicode in Python 3.
# ConsumeInt(int: bytes): Consume a signed integer of the specified size (when written in two's complement notation).
# ConsumeUInt(int: bytes): Consume an unsigned integer of the specified size.
# ConsumeIntInRange(min: int, max: int): Consume an integer in the range [min, max].
# ConsumeIntList(count: int, bytes: int): Consume a list of count integers of size bytes.
# ConsumeIntListInRange(count: int, min: int, max: int): Consume a list of count integers in the range [min, max].
# ConsumeFloat(): Consume an arbitrary floating#point value. Might produce weird values like NaN and Inf.
# ConsumeRegularFloat(): Consume an arbitrary numeric floating#point value; never produces a special type like NaN or Inf.
# ConsumeProbability(): Consume a floating#point value in the range [0, 1].
# ConsumeFloatInRange(min: float, max: float): Consume a floating#point value in the range [min, max].
# ConsumeFloatList(count: int): Consume a list of count arbitrary floating#point values. Might produce weird values like NaN and Inf.
# ConsumeRegularFloatList(count: int): Consume a list of count arbitrary numeric floating#point values; never produces special types like NaN or Inf.
# ConsumeProbabilityList(count: int): Consume a list of count floats in the range [0, 1].
# ConsumeFloatListInRange(count: int, min: float, max: float): Consume a list of count floats in the range [min, max].
# PickValueInList(l: list): Given a list, pick a random value.
# ConsumeBool(): Consume either True or False.

Fuzzer Results:

In [1]:
# Function signature is npmatmul(x1: Array, x2: Array, /) -> Array
# https://numpy.org/doc/stable/reference/generated/numpy.matmul.html
# fuzz test for np.matmul():
import sys
import atheris
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  x1 = np.array(fdp.ConsumeIntList(10, 4))
  x2 = np.array(fdp.ConsumeIntList(10, 4))
  np.matmul(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

INFO: Instrumenting 21130 functions...


INFO: Instrumentation complete.


INFO: Using built-in libfuzzer
INFO: libFuzzer ignores flags that start with '--'
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 2160301694
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes
INFO: A corpus is not provided, starting from an empty corpus
#2	INITED cov: 1 ft: 1 corp: 1/1b exec/s: 0 rss: 315Mb
#35	NEW    cov: 61 ft: 61 corp: 2/3b lim: 4 exec/s: 0 rss: 315Mb L: 2/2 MS: 3 InsertByte-ShuffleBytes-ShuffleBytes-
#419	NEW    cov: 172 ft: 177 corp: 3/6b lim: 6 exec/s: 0 rss: 315Mb L: 3/3 MS: 4 InsertByte-EraseBytes-InsertByte-ChangeByte-
#1606	NEW    cov: 173 ft: 178 corp: 4/9b lim: 17 exec/s: 0 rss: 315Mb L: 3/3 MS: 2 InsertByte-ShuffleBytes-
#14879	NEW    cov: 268 ft: 278 corp: 5/11b lim: 142 exec/s: 0 rss: 315Mb L: 2/3 MS: 3 ShuffleBytes-CrossOver-ChangeBinInt-
#15337	NEW    cov: 287 ft: 323 corp: 6/12b lim: 142 exec/s: 0 rss: 315Mb L: 1/3 MS: 3 ChangeByte-EraseBytes-ShuffleBytes-
#15756	NEW    cov: 287 ft: 329 corp: 

KeyboardInterrupt: stopping.


SystemExit: 130

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
# Function signature is np.array(object, dtype=None, copy=True, order='K', subok=False, ndmin=0)
# https://numpy.org/doc/stable/reference/generated/numpy.array.html
# fuzz test for np.array():
import sys
import atheris
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  x1 = np.array(fdp.ConsumeIntList(10, 4))
  x2 = np.array(fdp.ConsumeIntList(10, 4))
  # np.array(x1, x2) have comment out because it will raise TypeError

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

INFO: Instrumenting 22574 functions...


: 

: 

In [None]:
# Function signature is np.array_equal(a1, a2, strict=False)
# https://numpy.org/doc/stable/reference/generated/numpy.array_equal.html
# fuzz test for np.array_equal():
import sys
import atheris
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  x1 = np.array(fdp.ConsumeIntList(10, 4))
  x2 = np.array(fdp.ConsumeIntList(10, 4))
  np.array_equal(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

In [None]:
# Function signature is np.array_equiv(a1, a2)
# https://numpy.org/doc/stable/reference/generated/numpy.array_equiv.html
# fuzz test for np.array_equiv():
import sys
import atheris
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  x1 = np.array(fdp.ConsumeIntList(10, 4))
  x2 = np.array(fdp.ConsumeIntList(10, 4))
  np.array_equiv(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()

if __name__ == "__main__":
  main()


In [None]:
# Function signature is np.array_repr(arr, max_line_width=None, precision=None, suppress_small=None)
# https://numpy.org/doc/stable/reference/generated/numpy.array_repr.html
# fuzz test for np.array_repr():
import sys
import atheris
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  x1 = np.array(fdp.ConsumeIntList(10, 4))
  x2 = np.array(fdp.ConsumeIntList(10, 4))
  np.array_repr(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

In [2]:
# Function signature is np.matmul(x1: Array, x2: Array, /) -> Array
# https://numpy.org/doc/stable/reference/generated/numpy.matmul.html
# fuzz test for np.matmul():
import atheris
import sys
import numpy as np

def TestOneInput(fuzz_data):
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  shape = fdp.ConsumeIntInRange(1, 100)
  x1 = np.random.uniform(size=(shape, shape))
  x2 = np.random.uniform(size=(shape, shape))
  np.matmul(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput)
  atheris.Fuzz()
if __name__ == "__main__":
    main()

INFO: Instrumenting 22574 functions...


INFO: Instrumentation complete.


INFO: Using built-in libfuzzer
INFO: libFuzzer ignores flags that start with '--'
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 1870938804
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes
INFO: A corpus is not provided, starting from an empty corpus
#2	INITED cov: 1 ft: 1 corp: 1/1b exec/s: 0 rss: 328Mb
#6	NEW    cov: 139 ft: 139 corp: 2/4b lim: 4 exec/s: 0 rss: 328Mb L: 3/3 MS: 4 CopyPart-ShuffleBytes-InsertByte-ChangeBit-
#615	NEW    cov: 267 ft: 303 corp: 3/11b lim: 8 exec/s: 0 rss: 328Mb L: 7/7 MS: 4 CrossOver-CopyPart-InsertByte-ChangeBit-
#616	NEW    cov: 268 ft: 403 corp: 4/14b lim: 8 exec/s: 0 rss: 328Mb L: 3/7 MS: 1 ChangeBinInt-
#1241	NEW    cov: 268 ft: 522 corp: 5/21b lim: 14 exec/s: 0 rss: 328Mb L: 7/7 MS: 5 ChangeBit-EraseBytes-InsertByte-ChangeBit-InsertByte-
#4096	pulse  cov: 268 ft: 522 corp: 5/21b lim: 38 exec/s: 2048 rss: 328Mb
#8192	pulse  cov: 268 ft: 522 corp: 5/21b lim: 80 exec/s: 2730 rss: 328Mb
#163

KeyboardInterrupt: stopping.


SystemExit: 130

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
# Function signature is np.matmul(x1: Array, x2: Array, /) -> Array
# https://numpy.org/doc/stable/reference/generated/numpy.matmul.html
# fuzz test for np.matmul():
import atheris
import sys
import numpy as np

def TestOneInput(data):
  fdp = atheris.FuzzedDataProvider(data)
  x1 = np.array([fdp.ConsumeInt(8) for _ in range(fdp.ConsumeInt(8))])
  x2 = np.array([fdp.ConsumeInt(8) for _ in range(fdp.ConsumeInt(8))])
  np.matmul(x1, x2)

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

INFO: Instrumenting 21130 functions...


INFO: Instrumentation complete.

 === Uncaught Python exception: ===
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 0 is different from 10)
Traceback (most recent call last):
  File "/tmp/ipykernel_906965/2035549597.py", line 12, in TestOneInput
    np.matmul(x1, x2)
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 0 is different from 10)



INFO: Using built-in libfuzzer
INFO: libFuzzer ignores flags that start with '--'
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 3541380530
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes
INFO: A corpus is not provided, starting from an empty corpus


SystemExit: -1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
import sys
import atheris
from io import StringIO
import numpy as np

def get_fuzz_types():
    # Define the rows
    dtype = np.dtype(
        [('f0', np.uint16), ('f1', np.float64), ('f2', 'S7'), ('f3', np.int8)]
    )

    # An expected match
    expected = np.array(
        [
            (1, 2.4, "a", -34),
            (2, 3.1, "b", 29),
            (3, 9.9, "g", 120),
        ],
        dtype=dtype
    )

    return dtype, expected

def TestOneInput(fuzz_data):
  dtype, expected = get_fuzz_types()
  fdp = atheris.FuzzedDataProvider(fuzz_data)
  new_data = StringIO(fdp.ConsumeString(sys.maxsize))

  try:
    np.matmul(new_data, dtype=dtype, delimiter=";", skiprows=True)
  # Catch all of the exceptions that are caught in 
  # https://github.com/numpy/numpy/blob/main/numpy/lib/tests/test_loadtxt.py
  except StopIteration:
    return
  except ValueError:
    return
  except IndexError:
    return
  except TypeError:
    return

def main():
  atheris.instrument_all()
  atheris.Setup(sys.argv, TestOneInput)
  atheris.Fuzz()

if __name__ == "__main__":
  main()

INFO: Instrumenting 21131 functions...


INFO: Instrumentation complete.


INFO: Using built-in libfuzzer
INFO: libFuzzer ignores flags that start with '--'
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 3449822923
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes
INFO: A corpus is not provided, starting from an empty corpus
#2	INITED cov: 6 ft: 6 corp: 1/1b exec/s: 0 rss: 334Mb
#71	NEW    cov: 66 ft: 66 corp: 2/4b lim: 4 exec/s: 0 rss: 334Mb L: 3/3 MS: 4 ChangeBit-InsertByte-InsertByte-ShuffleBytes-
#461	NEW    cov: 185 ft: 191 corp: 3/8b lim: 6 exec/s: 0 rss: 334Mb L: 4/4 MS: 5 InsertByte-ChangeByte-ChangeASCIIInt-ShuffleBytes-ChangeBinInt-
#1521	NEW    cov: 186 ft: 192 corp: 4/11b lim: 14 exec/s: 0 rss: 334Mb L: 3/4 MS: 5 ChangeBit-CrossOver-CrossOver-ChangeBinInt-ShuffleBytes-
#16035	NEW    cov: 280 ft: 293 corp: 5/13b lim: 156 exec/s: 0 rss: 334Mb L: 2/4 MS: 4 ShuffleBytes-ChangeASCIIInt-EraseBytes-ChangeBit-
#16423	NEW    cov: 303 ft: 382 corp: 6/14b lim: 156 exec/s: 0 rss: 334Mb L: 1/4 MS: 3 

KeyboardInterrupt: stopping.


SystemExit: 130

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
import atheris
import sys
import numpy as np

# Define a custom mutator for the input data
def CustomMutator(data):
  # We can use the built-in `atheris.Mutate` function to mutate the input data
  mutated_data = atheris.Mutate(data)
  # Return the mutated data
  return mutated_data

@atheris.instrument_func
def TestOneInput(data):
  # Use Atheris to create a FuzzedDataProvider
  fdp = atheris.FuzzedDataProvider(data)

  # Use the FuzzedDataProvider to generate random matrices for the matrix multiplication
  matrix1 = fdp.ConsumeFloatListInRange(2, -100, 100)
  matrix2 = fdp.ConsumeFloatListInRange(2, -100, 100)

  # Reshape the matrices to be 2x2
  matrix1 = np.reshape(matrix1, (2, 2))
  matrix2 = np.reshape(matrix2, (2, 2))

  # Perform the matrix multiplication
  result = np.matmul(matrix1, matrix2)

  # Check that the result is a 2x2 matrix
  if result.shape != (2, 2):
    raise RuntimeError('Unexpected matrix shape')

if __name__ == '__main__':
  # Set up Atheris with the custom mutator and the TestOneInput function
  atheris.Setup(sys.argv, TestOneInput, custom_mutator=CustomMutator)

  # Start the fuzzing process
  atheris.Fuzz()



 === Uncaught Python exception: ===
ValueError: cannot reshape array of size 2 into shape (2,2)


INFO: Using built-in libfuzzer
INFO: found LLVMFuzzerCustomMutator (0x7f3ad11d2ea0). Disabling -len_control by default.
INFO: libFuzzer ignores flags that start with '--'
INFO: Running with entropic power schedule (0xFF, 100).
INFO: Seed: 3720231343
INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 4096 bytes


SystemExit: -1

Traceback (most recent call last):
  File "/tmp/ipykernel_913923/3006103088.py", line 22, in TestOneInput
    matrix1 = np.reshape(matrix1, (2, 2))
  File "<__array_function__ internals>", line 200, in reshape
  File "/home/x/miniconda3/envs/tf/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 298, in reshape
    return _wrapfunc(a, 'reshape', newshape, order=order)
  File "/home/x/miniconda3/envs/tf/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 54, in _wrapfunc
    return _wrapit(obj, method, *args, **kwds)
  File "/home/x/miniconda3/envs/tf/lib/python3.9/site-packages/numpy/core/fromnumeric.py", line 43, in _wrapit
    result = getattr(asarray(obj), method)(*args, **kwds)
ValueError: cannot reshape array of size 2 into shape (2,2)



  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)



 === Timeout: 302s elapsed, timeout=300s ===
