diff --git a/COVERAGE.md b/COVERAGE.md new file mode 100644 index 0000000000000..c00b5b9d7c1b5 --- /dev/null +++ b/COVERAGE.md @@ -0,0 +1,38 @@ +# Assignment and plans +### Function Assignments: +- Each group member will analyze, test, and improve a specific function from Pandas: + - **Herdi**: `apply()` + - CCN 10 + - **Ahmed**: `quantile()` + - CCN 13 + - **Maxim**: `shift()` + - CCN 13 + - **Kim**: `_evaluate_numexp()` + - CCN 9 + - **Annika**: `to_time()` + - CCN 16 + +CCN measured through `lizard`. +### Tools + - Measuring complexity using `Lizard`. + - Install `Lizard` by running `pip install lizard`. Documentation can be found [here](https://pypi.org/project/lizard/). + - Manually instrumenting coverage with print/logging statements. + - Comparing results with an automated coverage tool such as `Coverage.py`. + - Install `Coverage.py` by runnning `python3 -m pip install coverage`. Documentation can be found [here](https://coverage.readthedocs.io/en/7.6.12/). +### Tasks for Each Member: + - Identify decision points and track execution paths. + - Compare manual vs. automated coverage results. + - Write new test cases to improve coverage. + - Document findings, including: + - Complexity scores. + - Test reports. + - Coverage improvements. + +### Final Deliverables: + - Report of findings and refactoring suggestions. (For each function: Make sure you have the complexity scores, manual instrumentation runnable output, the automated coverage tool's output, your suggestions for the refactoring). + - URL to the updated repository. + - Coverage information (e.g., number of code lines, coverage percentage) can be referenced from Pandas' online coverage tool. + +### Deadlines: + - All implementations must be completed by **Tuesday (hard deadline)**. + - Report writing will be done on **Wednesday**. \ No newline at end of file diff --git a/numexpr_testing/Dockerfile b/numexpr_testing/Dockerfile new file mode 100644 index 0000000000000..2ed760d7a4f36 --- /dev/null +++ b/numexpr_testing/Dockerfile @@ -0,0 +1,17 @@ +# Use Python 3.9 as base image +FROM python:3.9-slim + +# Set working directory +WORKDIR /app + +# Copy requirements first to leverage Docker cache +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy test files +COPY test_evaluate_numexpr.py . + +# Command to run tests +CMD ["pytest", "test_evaluate_numexpr.py", "-v"] \ No newline at end of file diff --git a/numexpr_testing/reference.txt b/numexpr_testing/reference.txt new file mode 100644 index 0000000000000..6dbf584bcd4e2 --- /dev/null +++ b/numexpr_testing/reference.txt @@ -0,0 +1,2 @@ +docker build -t numexpr-tests . +docker run numexpr-tests \ No newline at end of file diff --git a/numexpr_testing/requirements.txt b/numexpr_testing/requirements.txt new file mode 100644 index 0000000000000..90ad863d9d28a --- /dev/null +++ b/numexpr_testing/requirements.txt @@ -0,0 +1,3 @@ +numpy +numexpr +pytest \ No newline at end of file diff --git a/numexpr_testing/test_evaluate_numexpr.py b/numexpr_testing/test_evaluate_numexpr.py new file mode 100644 index 0000000000000..781efe2a2d0ea --- /dev/null +++ b/numexpr_testing/test_evaluate_numexpr.py @@ -0,0 +1,206 @@ +import numpy as np +import numexpr as ne +import warnings +import pytest +import sys +from operator import add, sub, mul, truediv +import math + +# Branch coverage tracking +branch_coverage = {} + +def record_branch(branch_id): + """Track execution count of a specific branch.""" + if branch_id in branch_coverage: + branch_coverage[branch_id] += 1 + else: + branch_coverage[branch_id] = 1 + +# Supporting functions +def _can_use_numexpr(op, op_str, left_op, right_op, eval_type): + """Simplified check for numexpr compatibility.""" + return True + +def _bool_arith_fallback(op_str, left_op, right_op): + """Simplified boolean arithmetic fallback.""" + return True + +def _evaluate_standard(op, op_str, left_op, right_op): + """Standard evaluation fallback.""" + return op(left_op, right_op) + +_TEST_MODE = False + +def _store_test_result(result): + """Store test results (simplified).""" + pass + +def _evaluate_numexpr(op, op_str, left_op, right_op): + record_branch(1) # Entry point + result = None + + if _can_use_numexpr(op, op_str, left_op, right_op, "evaluate"): + record_branch(2) + is_reversed = hasattr(op, '__name__') and op.__name__.startswith('r') + + if is_reversed: + record_branch(3) + left_op, right_op = right_op, left_op + + left_value = left_op + right_value = right_op + + try: + record_branch(4) + result = ne.evaluate( + f"left_value {op_str} right_value", + local_dict={"left_value": left_value, "right_value": right_value}, + casting="safe", + ) + except TypeError: + record_branch(5) + pass + except NotImplementedError: + record_branch(6) + if _bool_arith_fallback(op_str, left_op, right_op): + record_branch(7) + pass + else: + record_branch(8) + raise + + if is_reversed: + record_branch(9) + left_op, right_op = right_op, left_op + + if _TEST_MODE: + record_branch(10) + _store_test_result(result is not None) + + if result is None: + record_branch(11) + result = _evaluate_standard(op, op_str, left_op, right_op) + + record_branch(12) + return result + +def analyze_coverage(): + """Print branch coverage analysis""" + total_branches = 12 # Total number of branches in the code + covered_branches = sum(1 for count in branch_coverage.values() if count > 0) + + print("\nBranch Coverage Analysis:") + print("-" * 50) + print(f"Total branches: {total_branches}") + print(f"Covered branches: {covered_branches}") + coverage_percentage = (covered_branches / total_branches * 100) if total_branches > 0 else 0 + print(f"Coverage percentage: {coverage_percentage:.2f}%") + print("\nDetailed branch execution counts:") + for branch_id, count in sorted(branch_coverage.items()): + print(f"Branch {branch_id}: {count} times") + print("-" * 50) + return covered_branches, total_branches + +# Create a reversed add operation +def radd(x, y): + radd.__name__ = 'radd' + return add(y, x) + +@pytest.fixture +def setup_arrays(): + """Fixture for common test arrays""" + return { + 'basic': (np.array([1, 2, 3]), np.array([4, 5, 6])), + 'float': (np.array([1.5, 2.5, 3.5]), np.array([1.0, 2.0, 3.0])), + 'bool': (np.array([True, False, True]), np.array([True, True, False])), + 'small': (np.arange(10), np.arange(10)), + 'large': (np.arange(100000), np.arange(100000)) + } + +# Category 1: Basic Arithmetic Operations +def test_basic_arithmetic(setup_arrays): + left, right = setup_arrays['basic'] + + # Addition + result = _evaluate_numexpr(add, '+', left, right) + assert np.array_equal(result, np.array([5, 7, 9])) + + # Subtraction + result = _evaluate_numexpr(sub, '-', left, right) + assert np.array_equal(result, np.array([-3, -3, -3])) + + # Multiplication + result = _evaluate_numexpr(mul, '*', left, right) + assert np.array_equal(result, np.array([4, 10, 18])) + + # Division + result = _evaluate_numexpr(truediv, '/', left, right) + np.testing.assert_array_almost_equal(result, np.array([0.25, 0.4, 0.5])) + +# Category 2: Reversed Operations +def test_reversed_operations(setup_arrays): + left, right = setup_arrays['basic'] + + def reversed_sub(x, y): + reversed_sub.__name__ = 'rsub' + return y - x + + result = _evaluate_numexpr(reversed_sub, '-', left, right) + assert np.array_equal(result, np.array([3, 3, 3])) + +# Category 3: Different Data Types +def test_different_dtypes(setup_arrays): + int_arr, float_arr = np.array([1, 2, 3]), np.array([1.5, 2.5, 3.5]) + bool_arr = np.array([True, False, True]) + + # Integer + Float + result = _evaluate_numexpr(add, '+', int_arr, float_arr) + np.testing.assert_array_almost_equal(result, np.array([2.5, 4.5, 6.5])) + + # Test boolean with integers + result = _evaluate_numexpr(add, '+', bool_arr, np.array([1, 1, 1])) + assert np.array_equal(result, np.array([2, 1, 2])) + +# Category 4: Size Thresholds +def test_size_thresholds(setup_arrays): + small_left, small_right = setup_arrays['small'] + large_left, large_right = setup_arrays['large'] + + # Small arrays + result_small = _evaluate_numexpr(add, '+', small_left, small_right) + assert np.array_equal(result_small, small_left + small_right) + + # Large arrays + result_large = _evaluate_numexpr(add, '+', large_left, large_right) + assert np.array_equal(result_large, large_left + large_right) + +# Category 5: Edge Cases +def test_edge_cases(): + # Empty arrays + result = _evaluate_numexpr(add, '+', np.array([]), np.array([])) + assert len(result) == 0 + + # Single element + result = _evaluate_numexpr(add, '+', np.array([1]), np.array([2])) + assert np.array_equal(result, np.array([3])) + + # NaN values + nan_arr = np.array([np.nan, 1, 2]) + result = _evaluate_numexpr(add, '+', nan_arr, np.array([1, 2, 3])) + assert np.isnan(result[0]) + assert np.array_equal(result[1:], np.array([3, 5])) + + # Infinity + inf_arr = np.array([np.inf, -np.inf, 1]) + result = _evaluate_numexpr(add, '+', inf_arr, np.array([1, 1, 1])) + assert np.isinf(result[0]) and result[0] > 0 + assert np.isinf(result[1]) and result[1] < 0 + assert result[2] == 2 + +@pytest.fixture(scope="session", autouse=True) +def final_coverage(): + yield + covered_branches, total_branches = analyze_coverage() + coverage_percentage = (covered_branches / total_branches * 100) + print(f"\nFinal coverage: {coverage_percentage:.2f}%") + assert coverage_percentage >= 90, f"Branch coverage ({coverage_percentage:.2f}%) is below 90%" \ No newline at end of file diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d0c0ed29b6d44..a2ce91ed9c921 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -38,7 +38,7 @@ class providing the base-class of operations. from pandas._libs import ( Timestamp, - lib, + lib, ) from pandas._libs.algos import rank_1d import pandas._libs.groupby as libgroupby @@ -4230,6 +4230,16 @@ def _nth( grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) return grb.nth(n) + # Global dictionary to track which branches are executed + branch_coverage = {} + + def record_branch(branch_id): + """Track execution count of a specific branch.""" + if branch_id in branch_coverage: + branch_coverage[branch_id] += 1 + else: + branch_coverage[branch_id] = 1 + @final def quantile( self, @@ -4239,156 +4249,106 @@ def quantile( ] = "linear", numeric_only: bool = False, ): - """ - Return group values at the given quantile, a la numpy.percentile. - - Parameters - ---------- - q : float or array-like, default 0.5 (50% quantile) - Value(s) between 0 and 1 providing the quantile(s) to compute. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - Method to use when the desired quantile falls between two points. - numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. - - .. versionadded:: 1.5.0 - - .. versionchanged:: 2.0.0 - - numeric_only now defaults to ``False``. - - Returns - ------- - Series or DataFrame - Return type determined by caller of GroupBy object. - - See Also - -------- - Series.quantile : Similar method for Series. - DataFrame.quantile : Similar method for DataFrame. - numpy.percentile : NumPy method to compute qth percentile. - - Examples - -------- - >>> df = pd.DataFrame( - ... [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 3], ["b", 5]], - ... columns=["key", "val"], - ... ) - >>> df.groupby("key").quantile() - val - key - a 2.0 - b 3.0 - """ + """Return group values at the given quantile.""" + + record_branch(1) # Entry point + mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile") obj = self._wrap_agged_manager(mgr) splitter = self._grouper._get_splitter(obj) sdata = splitter._sorted_data - + starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups) - + def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]: + """Prepares input values and handles different data types.""" if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype): - raise TypeError( - f"dtype '{vals.dtype}' does not support operation 'quantile'" - ) - + record_branch(2) + raise TypeError(f"dtype '{vals.dtype}' does not support operation 'quantile'") + inference: DtypeObj | None = None if isinstance(vals, BaseMaskedArray) and is_numeric_dtype(vals.dtype): + record_branch(3) out = vals.to_numpy(dtype=float, na_value=np.nan) inference = vals.dtype elif is_integer_dtype(vals.dtype): + record_branch(4) if isinstance(vals, ExtensionArray): + record_branch(5) out = vals.to_numpy(dtype=float, na_value=np.nan) else: out = vals inference = np.dtype(np.int64) elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): + record_branch(6) out = vals.to_numpy(dtype=float, na_value=np.nan) elif is_bool_dtype(vals.dtype): - # GH#51424 remove to match Series/DataFrame behavior + record_branch(7) raise TypeError("Cannot use quantile with bool dtype") elif needs_i8_conversion(vals.dtype): + record_branch(8) inference = vals.dtype - # In this case we need to delay the casting until after the - # np.lexsort below. - # error: Incompatible return value type (got - # "Tuple[Union[ExtensionArray, ndarray[Any, Any]], Union[Any, - # ExtensionDtype]]", expected "Tuple[ndarray[Any, Any], - # Optional[Union[dtype[Any], ExtensionDtype]]]") - return vals, inference # type: ignore[return-value] + return vals, inference elif isinstance(vals, ExtensionArray) and is_float_dtype(vals.dtype): + record_branch(9) inference = np.dtype(np.float64) out = vals.to_numpy(dtype=float, na_value=np.nan) else: + record_branch(10) out = np.asarray(vals) - + return out, inference - + def post_processor( vals: np.ndarray, inference: DtypeObj | None, result_mask: np.ndarray | None, orig_vals: ArrayLike, ) -> ArrayLike: + """Handles final output formatting and data type conversion.""" if inference: - # Check for edge case + record_branch(11) if isinstance(orig_vals, BaseMaskedArray): - assert result_mask is not None # for mypy - - if interpolation in {"linear", "midpoint"} and not is_float_dtype( - orig_vals - ): + record_branch(12) + assert result_mask is not None + + if interpolation in {"linear", "midpoint"} and not is_float_dtype(orig_vals): + record_branch(13) return FloatingArray(vals, result_mask) else: - # Item "ExtensionDtype" of "Union[ExtensionDtype, str, - # dtype[Any], Type[object]]" has no attribute "numpy_dtype" - # [union-attr] + record_branch(14) with warnings.catch_warnings(): - # vals.astype with nan can warn with numpy >1.24 warnings.filterwarnings("ignore", category=RuntimeWarning) - return type(orig_vals)( - vals.astype( - inference.numpy_dtype # type: ignore[union-attr] - ), - result_mask, - ) - - elif not ( - is_integer_dtype(inference) - and interpolation in {"linear", "midpoint"} - ): + return type(orig_vals)(vals.astype(inference.numpy_dtype), result_mask) + + elif not (is_integer_dtype(inference) and interpolation in {"linear", "midpoint"}): + record_branch(15) if needs_i8_conversion(inference): - # error: Item "ExtensionArray" of "Union[ExtensionArray, - # ndarray[Any, Any]]" has no attribute "_ndarray" - vals = vals.astype("i8").view( - orig_vals._ndarray.dtype # type: ignore[union-attr] - ) - # error: Item "ExtensionArray" of "Union[ExtensionArray, - # ndarray[Any, Any]]" has no attribute "_from_backing_data" - return orig_vals._from_backing_data( # type: ignore[union-attr] - vals - ) - - assert isinstance(inference, np.dtype) # for mypy + record_branch(16) + vals = vals.astype("i8").view(orig_vals._ndarray.dtype) + return orig_vals._from_backing_data(vals) + + assert isinstance(inference, np.dtype) return vals.astype(inference) - + return vals - + if is_scalar(q): + record_branch(17) qs = np.array([q], dtype=np.float64) pass_qs: None | np.ndarray = None else: + record_branch(18) qs = np.asarray(q, dtype=np.float64) pass_qs = qs - + ids = self._grouper.ids ngroups = self._grouper.ngroups if self.dropna: - # splitter drops NA groups, we need to do the same + record_branch(19) ids = ids[ids >= 0] nqs = len(qs) - + func = partial( libgroupby.group_quantile, labels=ids, @@ -4397,40 +4357,46 @@ def post_processor( starts=starts, ends=ends, ) - + def blk_func(values: ArrayLike) -> ArrayLike: + """Handles computation logic for grouped quantiles.""" orig_vals = values if isinstance(values, BaseMaskedArray): + record_branch(20) mask = values._mask result_mask = np.zeros((ngroups, nqs), dtype=np.bool_) else: + record_branch(21) mask = isna(values) result_mask = None - + is_datetimelike = needs_i8_conversion(values.dtype) - vals, inference = pre_processor(values) - + ncols = 1 if vals.ndim == 2: + record_branch(22) ncols = vals.shape[0] - + out = np.empty((ncols, ngroups, nqs), dtype=np.float64) - + if is_datetimelike: + record_branch(23) vals = vals.view("i8") - + if vals.ndim == 1: - # EA is always 1d + record_branch(24) func( out[0], values=vals, - mask=mask, # type: ignore[arg-type] + mask=mask, result_mask=result_mask, is_datetimelike=is_datetimelike, ) else: + record_branch(25) for i in range(ncols): + record_branch(26) func( out[i], values=vals[i], @@ -4438,21 +4404,26 @@ def blk_func(values: ArrayLike) -> ArrayLike: result_mask=None, is_datetimelike=is_datetimelike, ) - + if vals.ndim == 1: + record_branch(27) out = out.ravel("K") if result_mask is not None: + record_branch(28) result_mask = result_mask.ravel("K") else: + record_branch(29) out = out.reshape(ncols, ngroups * nqs) - + return post_processor(out, inference, result_mask, orig_vals) - + res_mgr = sdata._mgr.grouped_reduce(blk_func) - res = self._wrap_agged_manager(res_mgr) + + record_branch(30) # Final return branch return self._wrap_aggregated_output(res, qs=pass_qs) + @final @Substitution(name="groupby") def ngroup(self, ascending: bool = True): diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 28cb25b515ed2..cca68b8d82632 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -8,6 +8,27 @@ ) import pandas._testing as tm +from pandas.core.groupby.groupby import quantile # Import quantile function +from pandas.core.groupby.groupby import branch_coverage + + +@pytest.fixture(scope="session", autouse=True) +def finalize_coverage(): + """After all test cases execute, compute and save the coverage report.""" + yield # Let all tests execute first + save_coverage() # Save coverage once all tests finish + print("\n Final Manual Coverage Report Generated!") + +def save_coverage(): + """Compute coverage percentage and save report.""" + total_branches = max(branch_coverage.keys(), default=0) # Prevent KeyError if empty + executed_branches = len([b for b, count in branch_coverage.items() if count > 0]) + coverage_score = (executed_branches / total_branches) * 100 if total_branches > 0 else 0 + + for branch, count in sorted(branch_coverage.items()): + print(f"Branch {branch}: executed {count} times\n") + print(f"Manual Coverage Score: {coverage_score:.2f}%") # Print to console + @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] diff --git a/report.md b/report.md new file mode 100644 index 0000000000000..6730fc5a313e9 --- /dev/null +++ b/report.md @@ -0,0 +1,103 @@ +# Report for assignment 3 + +This is a template for your report. You are free to modify it as needed. +It is not required to use markdown for your report either, but the report +has to be delivered in a standard, cross-platform format. + +## Project + +Name: + +URL: + +One or two sentences describing it + +## Onboarding experience + +Did it build and run as documented? + +See the assignment for details; if everything works out of the box, +there is no need to write much here. If the first project(s) you picked +ended up being unsuitable, you can describe the "onboarding experience" +for each project, along with reason(s) why you changed to a different one. + + +## Complexity + +1. What are your results for five complex functions? + * Did all methods (tools vs. manual count) get the same result? + * Are the results clear? +2. Are the functions just complex, or also long? +3. What is the purpose of the functions? +4. Are exceptions taken into account in the given measurements? +5. Is the documentation clear w.r.t. all the possible outcomes? + +## Refactoring + +Plan for refactoring complex code: + +Estimated impact of refactoring (lower CC, but other drawbacks?). + +Carried out refactoring (optional, P+): + +git diff ... + +## Coverage + +### Tools + +Document your experience in using a "new"/different coverage tool. + +How well was the tool documented? Was it possible/easy/difficult to +integrate it with your build environment? + +### Your own coverage tool + +Show a patch (or link to a branch) that shows the instrumented code to +gather coverage measurements. + +The patch is probably too long to be copied here, so please add +the git command that is used to obtain the patch instead: + +git diff ... + +What kinds of constructs does your tool support, and how accurate is +its output? + +### Evaluation + +1. How detailed is your coverage measurement? + +2. What are the limitations of your own tool? + +3. Are the results of your tool consistent with existing coverage tools? + +## Coverage improvement + +Show the comments that describe the requirements for the coverage. + +Report of old coverage: [link] + +Report of new coverage: [link] + +Test cases added: + +git diff ... + +Number of test cases added: two per team member (P) or at least four (P+). + +## Self-assessment: Way of working + +Current state according to the Essence standard: ... + +Was the self-assessment unanimous? Any doubts about certain items? + +How have you improved so far? + +Where is potential for improvement? + +## Overall experience + +What are your main take-aways from this project? What did you learn? + +Is there something special you want to mention here?