In [3]:
import json
from pathlib import Path
import pandas as pd
from collections import Counter

# Load all JSON files from sample_conversations
output_dir = Path("sample_conversations")
files = list(output_dir.glob("*.json"))

data = []
for file in files:
    with open(file, "r") as f:
        conv = json.load(f)
        bug_type = "Unknown"
        if "AssertionError" in conv["failing_output"]:
            bug_type = "AssertionError"
        elif "RecursionError" in conv["failing_output"]:
            bug_type = "RecursionError"
        # Add more detections as needed
        
        data.append({
            "file": file.name,
            "domain": conv["domain"],
            "topic": conv["topic"],
            # "diff_size": len(conv["code_diff"].splitlines()),
            "bug_type": bug_type,
            "success": "OK" in conv["failing_output"],  # Rough check for test success, adjust as needed
        })

df = pd.DataFrame(data)

# Summary stats
print("Total conversations:", len(df))
# print("Average diff size:", df["diff_size"].mean())
print("Bug type distribution:", Counter(df["bug_type"]))

df  # Display dataframe


Total conversations: 7
Bug type distribution: Counter({'AssertionError': 7})


Unnamed: 0,file,domain,topic,bug_type,success
0,conversation_20250923T085141_assertions.json,Mathematics,Arithmetic,AssertionError,False
1,conversation_20250923T021810_002.json,Materials Science,Crystal lattice simulation,AssertionError,False
2,conversation_20250923T023958_005.json,Computational Biology,Gene regulatory networks,AssertionError,False
3,conversation_20250923T023323_004.json,Computational Physics,Orbital mechanics simulation,AssertionError,False
4,conversation_20250923T021511_001.json,Computational Neuroscience,Spike train analysis,AssertionError,False
5,conversation_20250923T085412_input_output.json,Mathematics,Recursion,AssertionError,False
6,conversation_20250923T023141_003.json,Climate Science,Atmospheric modelling,AssertionError,False


In [9]:
import pandas as pd
import json
from pprint import pprint

In [4]:
path = "/Users/masoud/Documents/Repos/synth-python-edit/sample_conversations/conversation_20250923T134033_001.json"
data = json.load(open(path, "r"))

In [5]:
data

{'buggy_code': '"""\nTools to compute steady states of linearized gene regulatory networks:\nSolve A x = -b for steady-state x. Uses RREF (pure Python) to handle singular and underdetermined systems.\n"""\nfrom typing import List, Tuple\nEPS = 1e-12\n\nclass InconsistentSystemError(Exception):\n    """Raised when the linear system A x = -b has no solution."""\n    pass\n\ndef _to_float_matrix(A: List[List[float]], b: List[float]) -> List[List[float]]:\n    """Build augmented matrix [A | b] as floats."""\n    return [[float(v) for v in row] + [float(bi)] for row, bi in zip(A, b)]\n\ndef _rref_augmented(A: List[List[float]], b: List[float]) -> Tuple[List[List[float]], List[int]]:\n    """\n    Compute RREF of augmented matrix [A | b].\n    Returns (rref_matrix, pivot_columns) where pivot_columns lists pivot column indices (0..n-1) in order by row.\n    """\n    m = _to_float_matrix(A, b)\n    rows = len(m)\n    if rows == 0:\n        return (m, [])\n    cols = len(m[0]) - 1\n    pivot_ro

In [11]:
print(data["code_diff"])

--- correct.py
+++ buggy.py
@@ -1,10 +1,8 @@
-# grn_steady_state.py
 """
 Tools to compute steady states of linearized gene regulatory networks:
 Solve A x = -b for steady-state x. Uses RREF (pure Python) to handle singular and underdetermined systems.
 """
 from typing import List, Tuple
-
 EPS = 1e-12
 
 class InconsistentSystemError(Exception):
@@ -23,22 +21,18 @@
     m = _to_float_matrix(A, b)
     rows = len(m)
     if rows == 0:
-        return m, []
-    cols = len(m[0]) - 1  # number of variables
+        return (m, [])
+    cols = len(m[0]) - 1
     pivot_row = 0
     pivot_cols = []
     for col in range(cols):
-        # find pivot with max abs value in column col at or below pivot_row
-        sel = max(range(pivot_row, rows), key=lambda r: abs(m[r][col])) if pivot_row < rows else None
+        sel = max(range(pivot_row, rows), key=lambda r: abs(m[r][col])) if pivot_row > rows else None
         if sel is None or abs(m[sel][col]) < EPS:
             continue
-        # swa

In [10]:
pprint(data["problem_description"])

('You are given a linearized gene regulatory network described by dx/dt = A x '
 '+ b where A is an n x n interaction matrix\n'
 'and b is an n-dimensional basal production vector. The steady-state(s) '
 'satisfy A x = -b. In real GRN analyses, A may be\n'
 'singular (regulatory redundancy) or inconsistent with b (no steady-state '
 'under the linear model), and tooling must robustly\n'
 'detect and report these cases.\n'
 '\n'
 'The task is to provide a small Python module that computes steady-state '
 'solutions for arbitrary small dense matrices using\n'
 'pure Python (standard library only). The module must:\n'
 '- Solve A x = -b and return a unique solution when A is full-rank.\n'
 '- When the system has infinitely many solutions, return one particular '
 'solution plus a basis for the nullspace (homogeneous solutions).\n'
 '- Detect inconsistent systems (no solution) and raise a clear exception.\n'
 '- Provide a simple heuristic stability hint (diagonal dominance as a '
 'suffici

In [7]:
print(data["buggy_code"])

"""
Tools to compute steady states of linearized gene regulatory networks:
Solve A x = -b for steady-state x. Uses RREF (pure Python) to handle singular and underdetermined systems.
"""
from typing import List, Tuple
EPS = 1e-12

class InconsistentSystemError(Exception):
    """Raised when the linear system A x = -b has no solution."""
    pass

def _to_float_matrix(A: List[List[float]], b: List[float]) -> List[List[float]]:
    """Build augmented matrix [A | b] as floats."""
    return [[float(v) for v in row] + [float(bi)] for row, bi in zip(A, b)]

def _rref_augmented(A: List[List[float]], b: List[float]) -> Tuple[List[List[float]], List[int]]:
    """
    Compute RREF of augmented matrix [A | b].
    Returns (rref_matrix, pivot_columns) where pivot_columns lists pivot column indices (0..n-1) in order by row.
    """
    m = _to_float_matrix(A, b)
    rows = len(m)
    if rows == 0:
        return (m, [])
    cols = len(m[0]) - 1
    pivot_row = 0
    pivot_cols = []
    for col in 

In [8]:
print(data["unit_tests"])

# test_grn_steady_state.py
import unittest
import math
import grn_steady_state as gss

class TestGRNSteadyState(unittest.TestCase):
    def assertVecAlmostEqual(self, a, b, tol=1e-9):
        self.assertEqual(len(a), len(b))
        for x, y in zip(a, b):
            self.assertTrue(abs(x - y) <= tol, f"{x} != {y}")

    def test_unique_solution(self):
        # A is invertible; dx/dt = A x + b ; steady state satisfies A x = -b
        A = [[-1.0, 0.0],
             [0.0, -2.0]]
        b = [1.0, 2.0]
        res = gss.solve_steady_state(A, b)
        self.assertEqual(res['type'], 'unique')
        # expected x = [1,1] as computed in analysis
        self.assertVecAlmostEqual(res['solution'], [1.0, 1.0])
        # stability heuristic should detect diagonal dominance
        self.assertTrue(gss.is_strictly_diagonally_dominant(A))

    def test_infinite_solutions_homogeneous(self):
        # A has rank 1, homogeneous system with infinite solutions
        A = [[1.0, -1.0],
             [