pandas-dev · KWJ222 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025 · Feb 15, 2025
diff --git a/COVERAGE.md b/COVERAGE.md
@@ -0,0 +1,38 @@
+# Assignment and plans
+### Function Assignments:
+- Each group member will analyze, test, and improve a specific function from Pandas:
+  - **Herdi**: `apply()`
+    - CCN 10
+  - **Ahmed**: `quantile()`
+    - CCN 13
+  - **Maxim**: `shift()` 
+    - CCN 13
+  - **Kim**: `_evaluate_numexp()`
+    - CCN 9
+  - **Annika**: `to_time()`
+    - CCN 16
+
+CCN measured through `lizard`.
+### Tools
+  - Measuring complexity using `Lizard`.
+    - Install `Lizard` by running `pip install lizard`. Documentation can be found [here](https://pypi.org/project/lizard/).
+  - Manually instrumenting coverage with print/logging statements.
+  - Comparing results with an automated coverage tool such as `Coverage.py`.
+    - Install `Coverage.py` by runnning `python3 -m pip install coverage`. Documentation can be found [here](https://coverage.readthedocs.io/en/7.6.12/).
+### Tasks for Each Member:
+  - Identify decision points and track execution paths.
+  - Compare manual vs. automated coverage results.
+  - Write new test cases to improve coverage.
+  - Document findings, including:
+    - Complexity scores.
+    - Test reports.
+    - Coverage improvements.
+
+### Final Deliverables:
+  - Report of findings and refactoring suggestions. (For each function: Make sure you have the complexity scores, manual instrumentation runnable output, the automated coverage tool's output, your suggestions for the refactoring).
+  - URL to the updated repository.
+  - Coverage information (e.g., number of code lines, coverage percentage) can be referenced from Pandas' online coverage tool.
+
+### Deadlines:
+  - All implementations must be completed by **Tuesday (hard deadline)**.
+  - Report writing will be done on **Wednesday**.
diff --git a/numexpr_testing/Dockerfile b/numexpr_testing/Dockerfile
@@ -0,0 +1,17 @@
+# Use Python 3.9 as base image
+FROM python:3.9-slim
+
+# Set working directory
+WORKDIR /app
+
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy test files
+COPY test_evaluate_numexpr.py .
+
+# Command to run tests
+CMD ["pytest", "test_evaluate_numexpr.py", "-v"]
diff --git a/numexpr_testing/reference.txt b/numexpr_testing/reference.txt
@@ -0,0 +1,2 @@
+docker build -t numexpr-tests .
+docker run numexpr-tests
diff --git a/numexpr_testing/requirements.txt b/numexpr_testing/requirements.txt
@@ -0,0 +1,3 @@
+numpy
+numexpr
+pytest
diff --git a/numexpr_testing/test_evaluate_numexpr.py b/numexpr_testing/test_evaluate_numexpr.py
@@ -0,0 +1,206 @@
+import numpy as np
+import numexpr as ne
+import warnings
+import pytest
+import sys
+from operator import add, sub, mul, truediv
+import math
+
+# Branch coverage tracking
+branch_coverage = {}
+
+def record_branch(branch_id):
+    """Track execution count of a specific branch."""
+    if branch_id in branch_coverage:
+        branch_coverage[branch_id] += 1
+    else:
+        branch_coverage[branch_id] = 1
+
+# Supporting functions
+def _can_use_numexpr(op, op_str, left_op, right_op, eval_type):
+    """Simplified check for numexpr compatibility."""
+    return True
+
+def _bool_arith_fallback(op_str, left_op, right_op):
+    """Simplified boolean arithmetic fallback."""
+    return True
+
+def _evaluate_standard(op, op_str, left_op, right_op):
+    """Standard evaluation fallback."""
+    return op(left_op, right_op)
+
+_TEST_MODE = False
+
+def _store_test_result(result):
+    """Store test results (simplified)."""
+    pass
+
+def _evaluate_numexpr(op, op_str, left_op, right_op):
+    record_branch(1)  # Entry point
+    result = None
+
+    if _can_use_numexpr(op, op_str, left_op, right_op, "evaluate"):
+        record_branch(2)
+        is_reversed = hasattr(op, '__name__') and op.__name__.startswith('r')
+
+        if is_reversed:
+            record_branch(3)
+            left_op, right_op = right_op, left_op
+
+        left_value = left_op
+        right_value = right_op
+
+        try:
+            record_branch(4)
+            result = ne.evaluate(
+                f"left_value {op_str} right_value",
+                local_dict={"left_value": left_value, "right_value": right_value},
+                casting="safe",
+            )
+        except TypeError:
+            record_branch(5)
+            pass
+        except NotImplementedError:
+            record_branch(6)
+            if _bool_arith_fallback(op_str, left_op, right_op):
+                record_branch(7)
+                pass
+            else:
+                record_branch(8)
+                raise
+
+        if is_reversed:
+            record_branch(9)
+            left_op, right_op = right_op, left_op
+
+    if _TEST_MODE:
+        record_branch(10)
+        _store_test_result(result is not None)
+
+    if result is None:
+        record_branch(11)
+        result = _evaluate_standard(op, op_str, left_op, right_op)
+
+    record_branch(12)
+    return result
+
+def analyze_coverage():
+    """Print branch coverage analysis"""
+    total_branches = 12  # Total number of branches in the code
+    covered_branches = sum(1 for count in branch_coverage.values() if count > 0)
+
+    print("\nBranch Coverage Analysis:")
+    print("-" * 50)
+    print(f"Total branches: {total_branches}")
+    print(f"Covered branches: {covered_branches}")
+    coverage_percentage = (covered_branches / total_branches * 100) if total_branches > 0 else 0
+    print(f"Coverage percentage: {coverage_percentage:.2f}%")
+    print("\nDetailed branch execution counts:")
+    for branch_id, count in sorted(branch_coverage.items()):
+        print(f"Branch {branch_id}: {count} times")
+    print("-" * 50)
+    return covered_branches, total_branches
+
+# Create a reversed add operation
+def radd(x, y):
+    radd.__name__ = 'radd'
+    return add(y, x)
+
+@pytest.fixture
+def setup_arrays():
+    """Fixture for common test arrays"""
+    return {
+        'basic': (np.array([1, 2, 3]), np.array([4, 5, 6])),
+        'float': (np.array([1.5, 2.5, 3.5]), np.array([1.0, 2.0, 3.0])),
+        'bool': (np.array([True, False, True]), np.array([True, True, False])),
+        'small': (np.arange(10), np.arange(10)),
+        'large': (np.arange(100000), np.arange(100000))
+    }
+
+# Category 1: Basic Arithmetic Operations
+def test_basic_arithmetic(setup_arrays):
+    left, right = setup_arrays['basic']
+
+    # Addition
+    result = _evaluate_numexpr(add, '+', left, right)
+    assert np.array_equal(result, np.array([5, 7, 9]))
+
+    # Subtraction
+    result = _evaluate_numexpr(sub, '-', left, right)
+    assert np.array_equal(result, np.array([-3, -3, -3]))
+
+    # Multiplication
+    result = _evaluate_numexpr(mul, '*', left, right)
+    assert np.array_equal(result, np.array([4, 10, 18]))
+
+    # Division
+    result = _evaluate_numexpr(truediv, '/', left, right)
+    np.testing.assert_array_almost_equal(result, np.array([0.25, 0.4, 0.5]))
+
+# Category 2: Reversed Operations
+def test_reversed_operations(setup_arrays):
+    left, right = setup_arrays['basic']
+
+    def reversed_sub(x, y):
+        reversed_sub.__name__ = 'rsub'
+        return y - x
+
+    result = _evaluate_numexpr(reversed_sub, '-', left, right)
+    assert np.array_equal(result, np.array([3, 3, 3]))
+
+# Category 3: Different Data Types
+def test_different_dtypes(setup_arrays):
+    int_arr, float_arr = np.array([1, 2, 3]), np.array([1.5, 2.5, 3.5])
+    bool_arr = np.array([True, False, True])
+
+    # Integer + Float
+    result = _evaluate_numexpr(add, '+', int_arr, float_arr)
+    np.testing.assert_array_almost_equal(result, np.array([2.5, 4.5, 6.5]))
+
+    # Test boolean with integers
+    result = _evaluate_numexpr(add, '+', bool_arr, np.array([1, 1, 1]))
+    assert np.array_equal(result, np.array([2, 1, 2]))
+
+# Category 4: Size Thresholds
+def test_size_thresholds(setup_arrays):
+    small_left, small_right = setup_arrays['small']
+    large_left, large_right = setup_arrays['large']
+
+    # Small arrays
+    result_small = _evaluate_numexpr(add, '+', small_left, small_right)
+    assert np.array_equal(result_small, small_left + small_right)
+
+    # Large arrays
+    result_large = _evaluate_numexpr(add, '+', large_left, large_right)
+    assert np.array_equal(result_large, large_left + large_right)
+
+# Category 5: Edge Cases
+def test_edge_cases():
+    # Empty arrays
+    result = _evaluate_numexpr(add, '+', np.array([]), np.array([]))
+    assert len(result) == 0
+
+    # Single element
+    result = _evaluate_numexpr(add, '+', np.array([1]), np.array([2]))
+    assert np.array_equal(result, np.array([3]))
+
+    # NaN values
+    nan_arr = np.array([np.nan, 1, 2])
+    result = _evaluate_numexpr(add, '+', nan_arr, np.array([1, 2, 3]))
+    assert np.isnan(result[0])
+    assert np.array_equal(result[1:], np.array([3, 5]))
+
+    # Infinity
+    inf_arr = np.array([np.inf, -np.inf, 1])
+    result = _evaluate_numexpr(add, '+', inf_arr, np.array([1, 1, 1]))
+    assert np.isinf(result[0]) and result[0] > 0
+    assert np.isinf(result[1]) and result[1] < 0
+    assert result[2] == 2
+
+@pytest.fixture(scope="session", autouse=True)
+def final_coverage():
+    yield
+    covered_branches, total_branches = analyze_coverage()
+    coverage_percentage = (covered_branches / total_branches * 100)
+    print(f"\nFinal coverage: {coverage_percentage:.2f}%")
+    assert coverage_percentage >= 90, f"Branch coverage ({coverage_percentage:.2f}%) is below 90%"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		docker build -t numexpr-tests .
		docker run numexpr-tests
-Original file line number
+Diff line change
@@ -0,0 +1,3 @@
+    numpy
+    numexpr
+    pytest