# Evaluating tfcoder completions

In [3]:
import os
import sys
from pathlib import Path

CURRENT_DIRECTORY = Path(os.getcwd())
ROOT_DIRECTORY = (CURRENT_DIRECTORY / "..").absolute().resolve()

print(f"Current directory: {CURRENT_DIRECTORY}")
print(f"Root directory: {ROOT_DIRECTORY}")

sys.path.append(str(ROOT_DIRECTORY))

Current directory: /home/ubuntu/arga-arc/tf_coder
Root directory: /home/ubuntu/arga-arc


In [4]:
import typing as t
import json
from pprint import pprint
from dataclasses import dataclass
import tensorflow as tf
import numpy as np
import math

2024-02-25 17:19:03.960412: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-25 17:19:03.964363: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-25 17:19:04.006790: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-25 17:19:04.006816: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-25 17:19:04.007902: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [5]:
class OutputJSON(t.TypedDict):
    task_id: str
    completions: t.List[str]
    coverage_percentage: float
    description: str
    tf_operators: t.Dict[str, int]
    total_covered: int
    total_in_target: int

class ExamplesJSON(t.TypedDict):
    inputs: str
    outputs: str

class TaskJSON(t.TypedDict):
    constants: str
    description: str
    name: str
    source: str
    target_program: str
    examples: ExamplesJSON



OUTPUT_FILE = CURRENT_DIRECTORY / "output_tfcoder.json"
DATASET_FILE = CURRENT_DIRECTORY / "tfcoder_dataset.json"

OUTPUT: t.List[OutputJSON] = json.loads(OUTPUT_FILE.read_text())
DATASET: t.List[TaskJSON] = json.loads(DATASET_FILE.read_text())

print(f"Loaded {len(OUTPUT)} outputs and {len(DATASET)} tasks")

TASK_JSONS = {task["name"]: task for task in DATASET}
OUTPUTS = {output["task_id"]: output for output in OUTPUT}

keys = list(TASK_JSONS.keys())[0:5]
for key in keys:
    print(f"Task: {key}")
    pprint(TASK_JSONS[key])
    pprint(OUTPUTS[key])
    print()

Loaded 72 outputs and 72 tasks
Task: google_01
{'constants': '[]',
 'description': 'Convert index tensor into pairs for SparseTensor indexing',
 'examples': {'inputs': '[[0, 0, 0, 1, 3, 3],]',
              'outputs': '[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]]'},
 'name': 'google_01',
 'source': 'From an internal Google chat room, 09/07/2018',
 'target_program': 'tf.cast(tf.where(tf.sequence_mask(tf.math.bincount(in1))), '
                   'tf.int32)'}
{'completions': ['\n'
                 'def convert_to_pairs(tensor):\n'
                 '    return tf.reshape(tensor, (-1, 2))\n'
                 '\n'
                 'tensor = tf.constant([0, 0, 0, 1, 3, 3])\n'
                 'pairs = convert_to_pairs(tensor)\n',
                 '\n'
                 'def convert_indices(indices):\n'
                 '    indices = tf.constant(indices)\n'
                 '    second_elements = tf.where(indices)\n'
                 '    first_elements = tf.subtract(second_elements, 1)\n

In [6]:
@dataclass
class Example:
    inputs: t.List[np.ndarray]
    output: t.Union[np.ndarray, tf.SparseTensor]

    @classmethod
    def from_json(cls, examples: ExamplesJSON):
        try:
            evaluated_inputs = eval(examples["inputs"])
            if isinstance(evaluated_inputs, list):
                inputs = [np.array(i) for i in evaluated_inputs]
            else:
                inputs = [evaluated_inputs]
        except Exception as e:
            print(f"Error evaluating inputs: {e}")
            print(f"Inputs: {examples['inputs']}")
            raise e

        try:
            evaluated_outputs = eval(examples["outputs"])
            if isinstance(evaluated_outputs, list):
                outputs = np.array(evaluated_outputs)
            elif isinstance(evaluated_outputs, tf.SparseTensor):
                outputs = evaluated_outputs
            elif isinstance(evaluated_outputs, tf.Tensor):
                outputs = evaluated_outputs.numpy()
            else:
                outputs = evaluated_outputs
        except Exception as e:
            print(f"Error evaluating outputs: {e}")
            print(f"Outputs: {examples['outputs']}")
            raise e

        return cls(inputs, outputs)

SKIP_TASKS = [] # ["google_13", "stackoverflow_05", "stackoverflow_10", "stackoverflow_13"]

# for each task in dataset, show inputs and outputs
for task in DATASET:
    if task["name"] in SKIP_TASKS:
        continue
    print(f"Task name: {task['name']}")
    pprint(Example.from_json(task["examples"]))
    print()

Task name: google_01
Example(inputs=[array([0, 0, 0, 1, 3, 3])],
        output=array([[0, 0],
       [0, 1],
       [0, 2],
       [1, 0],
       [3, 0],
       [3, 1]]))

Task name: google_02
Example(inputs=[array([[0., 1., 0., 0.],
       [0., 1., 1., 0.],
       [1., 1., 1., 1.]])],
        output=array([[0.  , 1.  , 0.  , 0.  ],
       [0.  , 0.5 , 0.5 , 0.  ],
       [0.25, 0.25, 0.25, 0.25]]))

Task name: google_03
Example(inputs=[SparseTensor(indices=tf.Tensor(
[[0 0 0]
 [0 1 1]
 [1 1 1]
 [1 1 2]], shape=(4, 3), dtype=int64), values=tf.Tensor([1. 1. 1. 1.], shape=(4,), dtype=float32), dense_shape=tf.Tensor([  2   2 800], shape=(3,), dtype=int64))],
        output=SparseTensor(indices=tf.Tensor(
[[0 0 0]
 [0 1 1]], shape=(2, 3), dtype=int64), values=tf.Tensor([1. 1.], shape=(2,), dtype=float32), dense_shape=tf.Tensor([  1   2 800], shape=(3,), dtype=int64)))

Task name: google_04
Example(inputs=[array([111, 112, 121, 122, 131, 132, 211, 212, 221, 222, 231, 232, 311,
       312, 

2024-02-25 17:19:06.226681: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [7]:
TEST_COMPLETIONS = [
"""
input = tf.constant([[1, 2, 3, 4], [5, 6, 7, 8]])

output = tf.repeat(input, repeats=6, axis=0)
"""
]

def evaluate_completion(completion: str) -> t.List[t.Union[np.ndarray, tf.SparseTensor]]:
    """
    evaluates the completion, returning any local variables with an 
    array-like value
    """
    locals_dict = {}
    try:
        exec(completion, None, locals_dict)
    except Exception as e:
        print(f"Error evaluating completion: {e}")
        print(f"Completion: {completion}")
        return []
    
    ans = []

    for key, value in locals_dict.items():
        try:
            if isinstance(value, np.ndarray) or isinstance(value, tf.SparseTensor):
                ans.append(value)
            elif isinstance(value, tf.Tensor):
                ans.append(value.numpy())
            elif isinstance(value, list):
                ans.append(np.array(value))
        except Exception as e:
            print(f"Error adding value: {e}")
            print(f"Key: {key}")
            print("value:")
            pprint(value)
            print(f"Type: {type(value)}")
    return ans

def matches_expected_value(actual: t.Union[np.ndarray, tf.SparseTensor], expected: t.Union[np.ndarray, tf.SparseTensor]) -> bool:
    if isinstance(actual, np.ndarray) and isinstance(expected, np.ndarray):
        return np.array_equal(actual, expected)
    elif isinstance(actual, tf.SparseTensor) and isinstance(expected, tf.SparseTensor):
        return tf.sparse.equal(actual, expected)
    else:
        return False

pprint([evaluate_completion(completion) for completion in TEST_COMPLETIONS])

[[array([[1, 2, 3, 4],
       [5, 6, 7, 8]], dtype=int32),
  array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8],
       [5, 6, 7, 8]], dtype=int32)]]


In [8]:
@dataclass
class Task:
    name: str
    description: str
    target_program: str
    examples: Example
    completions: t.List[str]

    @classmethod
    def from_json(cls, task: TaskJSON, output: OutputJSON):
        assert task["name"] == output["task_id"]
        return cls(
            task["name"],
            task["description"],
            task["target_program"],
            Example.from_json(task["examples"]),
            output["completions"]
        )

    def evaluate_completions(self) -> bool:
        for completion in self.completions:
            evaluated = evaluate_completion(completion)
            if any(matches_expected_value(e, self.examples.output) for e in evaluated):
                return True
        return False

TASKS = {
    task["name"]: Task.from_json(task, OUTPUTS[task["name"]])
    for task in DATASET
}

# pprint 5 tasks
keys = list(TASKS.keys())[0:5]
for key in keys:
    print(f"Task: {key}")
    pprint(TASKS[key])
    pprint(TASKS[key].evaluate_completions())
    print()

Task: google_01
Task(name='google_01',
     description='Convert index tensor into pairs for SparseTensor indexing',
     target_program='tf.cast(tf.where(tf.sequence_mask(tf.math.bincount(in1))), '
                    'tf.int32)',
     examples=Example(inputs=[array([0, 0, 0, 1, 3, 3])],
                      output=array([[0, 0],
       [0, 1],
       [0, 2],
       [1, 0],
       [3, 0],
       [3, 1]])),
     completions=['\n'
                  'def convert_to_pairs(tensor):\n'
                  '    return tf.reshape(tensor, (-1, 2))\n'
                  '\n'
                  'tensor = tf.constant([0, 0, 0, 1, 3, 3])\n'
                  'pairs = convert_to_pairs(tensor)\n',
                  '\n'
                  'def convert_indices(indices):\n'
                  '    indices = tf.constant(indices)\n'
                  '    second_elements = tf.where(indices)\n'
                  '    first_elements = tf.subtract(second_elements, 1)\n'
                  '    index_pairs = tf.s

2024-02-25 17:19:07.554468: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at sparse_slice_op.cc:60 : INVALID_ARGUMENT: Expected a non-negative size, got -1


In [10]:
for task in TASKS.values():
    print(f"Task: {task.name}")
    print(f"GPT-4 Successfully solved: {task.evaluate_completions()}")
    print()

Task: google_01
Error evaluating completion: Missing required positional argument
Completion: 
tensor = tf.constant([0, 0, 0, 1, 3, 3])

sorted_tensor = tf.sort(tensor)
duplicates = tf.equal(sorted_tensor, tf.roll(sorted_tensor, shift=1))

indices = tf.where(duplicates)

pairs = tf.gather(tensor, indices)

pairs = tf.squeeze(pairs)


Error evaluating completion: too many values to unpack (expected 2)
Completion: 
def index_to_pairs(indices):
    indices = tf.constant(indices)
    unique_indices, counts = tf.unique_with_counts(indices)
    row_indices = tf.expand_dims(unique_indices, -1)
    col_indices = tf.expand_dims(tf.range(tf.size(unique_indices)), -1)
    pairs = tf.stack([row_indices, col_indices], axis=-1)
    return pairs

indices = [0, 0, 0, 1, 3, 3]
pairs = index_to_pairs(indices)

GPT-4 Successfully solved: False

Task: google_02
GPT-4 Successfully solved: True

Task: google_03
Error evaluating completion: name 'sp' is not defined
Completion: slice = tf.sparse.slice(sp, sta

2024-02-25 17:19:46.291267: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at sparse_slice_op.cc:60 : INVALID_ARGUMENT: Expected a non-negative size, got -1
2024-02-25 17:19:46.390670: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:19:46.391245: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:19:46.391672: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:19:46.392267: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:19:46.392686: W tensorflow/core/framework/op_kernel.cc:1839] OP_RE

GPT-4 Successfully solved: False

Task: stackoverflow_20
Error evaluating completion: cannot compute Pack as input #1(zero-based) was expected to be a int32 tensor but is a int64 tensor [Op:Pack] name: stack
Completion: 
data = [[0.7, 0.2, 0.1], [0.4, 0.5, 0.1], [0.4, 0.4, 0.2], [0.3, 0.4, 0.3], [0.0, 0.0, 1.0]]

tensor = tf.constant(data)

argmax_indices = tf.argmax(tensor, axis=1)

indices = tf.stack([tf.range(tf.shape(argmax_indices)[0]), argmax_indices], axis=1)

new_tensor = tf.zeros_like(tensor)

result = tf.tensor_scatter_nd_update(new_tensor, indices, tf.ones(tf.shape(argmax_indices), dtype=tensor.dtype))


GPT-4 Successfully solved: True

Task: stackoverflow_21
GPT-4 Successfully solved: False

Task: stackoverflow_22
Error evaluating completion: {{function_node __wrapped____MklMatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [1,3], In[1]: [2,3] [Op:MatMul] name: 
Completion: 
v = tf.constant([3, 1, 10], dtype=tf.float32)

m = tf.con

2024-02-25 17:19:46.602004: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at gather_nd_op.cc:48 : INVALID_ARGUMENT: indices[2,0] = [4] does not index into param shape [1,5,2], node name: GatherNd


In [11]:
EVALUATED_TASKS_FILE = CURRENT_DIRECTORY / "evaluated_tfcoder.json"

EVALUATED_TASKS_FILE.write_text(json.dumps(
    {
        task.name: task.evaluate_completions()
        for task in TASKS.values()
    }
))

Error evaluating completion: Missing required positional argument
Completion: 
tensor = tf.constant([0, 0, 0, 1, 3, 3])

sorted_tensor = tf.sort(tensor)
duplicates = tf.equal(sorted_tensor, tf.roll(sorted_tensor, shift=1))

indices = tf.where(duplicates)

pairs = tf.gather(tensor, indices)

pairs = tf.squeeze(pairs)


Error evaluating completion: too many values to unpack (expected 2)
Completion: 
def index_to_pairs(indices):
    indices = tf.constant(indices)
    unique_indices, counts = tf.unique_with_counts(indices)
    row_indices = tf.expand_dims(unique_indices, -1)
    col_indices = tf.expand_dims(tf.range(tf.size(unique_indices)), -1)
    pairs = tf.stack([row_indices, col_indices], axis=-1)
    return pairs

indices = [0, 0, 0, 1, 3, 3]
pairs = index_to_pairs(indices)

Error evaluating completion: name 'sp' is not defined
Completion: slice = tf.sparse.slice(sp, start=[0, 0], size=[1, -1])

Error evaluating completion: {{function_node __wrapped__SparseSlice_device_/job:localhost

2024-02-25 17:20:28.147605: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at sparse_slice_op.cc:60 : INVALID_ARGUMENT: Expected a non-negative size, got -1
2024-02-25 17:20:28.230468: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:20:28.231086: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:20:28.231552: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:20:28.232175: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at scatter_nd_op.cc:218 : INVALID_ARGUMENT: indices[1] = [-1] does not index into shape [5]
2024-02-25 17:20:28.232665: W tensorflow/core/framework/op_kernel.cc:1839] OP_RE

Error evaluating completion: {{function_node __wrapped____MklMatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [1,3], In[1]: [2,3] [Op:MatMul] name: 
Completion: 
v = tf.constant([3, 1, 10], dtype=tf.float32)

m = tf.constant([[0.6, 0.4], [0.5, 1.0], [3.0, 4.0]], dtype=tf.float32)

result = tf.tensordot(v, m, axes=[-1, 1])


Error evaluating completion: {{function_node __wrapped____MklMatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [3,2], In[1]: [3,1] [Op:MatMul] name: 
Completion: 
vector = tf.constant([3, 1, 10], dtype=tf.float32)
matrix = tf.constant([[0.6, 0.4], [0.5, 1.0], [3.0, 4.0]], dtype=tf.float32)

result = tf.linalg.matvec(matrix, vector)


Error evaluating completion: {{function_node __wrapped____MklMatMul_device_/job:localhost/replica:0/task:0/device:CPU:0}} Matrix size-incompatible: In[0]: [3,2], In[1]: [3,1] [Op:MatMul] name: 
Completion: 
vector = tf.constant([3, 1, 10], dtype=tf.

2024-02-25 17:20:28.443305: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at gather_nd_op.cc:48 : INVALID_ARGUMENT: indices[2,0] = [4] does not index into param shape [1,5,2], node name: GatherNd


1768