-
Notifications
You must be signed in to change notification settings - Fork 10
Working draft for CodeTFv3 data model #1012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| from .v2.codetf import * # noqa: F403 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| from abc import ABCMeta | ||
| from enum import Enum | ||
| from pathlib import Path | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
| from codemodder.logging import logger | ||
|
|
||
|
|
||
| class CaseInsensitiveEnum(str, Enum): | ||
| @classmethod | ||
| def _missing_(cls, value: object): | ||
| if not isinstance(value, str): | ||
| return super()._missing_(value) | ||
|
|
||
| return cls.__members__.get(value.upper()) | ||
|
|
||
|
|
||
| class CodeTFWriter(BaseModel, metaclass=ABCMeta): | ||
| def write_report(self, outfile: Path | str) -> int: | ||
| try: | ||
| Path(outfile).write_text(self.model_dump_json(exclude_none=True)) | ||
| except Exception: | ||
| logger.exception("failed to write report file.") | ||
| # Any issues with writing the output file should exit status 2. | ||
| return 2 | ||
| logger.debug("wrote report to %s", outfile) | ||
| return 0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from enum import Enum | ||
| from typing import Optional | ||
|
|
||
| from pydantic import BaseModel, model_validator | ||
|
|
||
| from ..common import CaseInsensitiveEnum, CodeTFWriter | ||
|
|
||
|
|
||
| class Run(BaseModel): | ||
| """Metadata about the analysis run that produced the results""" | ||
|
|
||
| vendor: str | ||
| tool: str | ||
| version: str | ||
| # Optional free-form metadata about the project being analyzed | ||
| # e.g. project name, directory, commit SHA, etc. | ||
| projectMetadata: Optional[str] = None | ||
| # Analysis duration in milliseconds | ||
| elapsed: Optional[int] = None | ||
| # Optional free-form metadata about the inputs used for the analysis | ||
| # e.g. command line, environment variables, etc. | ||
| inputMetadata: Optional[dict] = None | ||
| # Optional free-form metadata about the analysis itself | ||
| # e.g. timeouts, memory usage, etc. | ||
| analysisMetadata: Optional[dict] = None | ||
|
|
||
|
|
||
| class FixStatusType(str, Enum): | ||
| """Status of a fix""" | ||
|
|
||
| fixed = "fixed" | ||
| skipped = "skipped" | ||
| failed = "failed" | ||
| wontfix = "wontfix" | ||
|
|
||
|
|
||
| class FixStatus(BaseModel): | ||
| """Metadata describing fix outcome""" | ||
|
|
||
| status: FixStatus | ||
| reason: Optional[str] | ||
| details: Optional[str] | ||
|
|
||
|
|
||
| class Rule(BaseModel): | ||
| id: str | ||
| name: str | ||
| url: Optional[str] = None | ||
|
|
||
|
|
||
| class Finding(BaseModel): | ||
| id: str | ||
| rule: Optional[Rule] = None | ||
|
|
||
|
|
||
| class Action(CaseInsensitiveEnum): | ||
| ADD = "add" | ||
| REMOVE = "remove" | ||
|
|
||
|
|
||
| class PackageResult(CaseInsensitiveEnum): | ||
| COMPLETED = "completed" | ||
| FAILED = "failed" | ||
| SKIPPED = "skipped" | ||
|
|
||
|
|
||
| class DiffSide(CaseInsensitiveEnum): | ||
| LEFT = "left" | ||
| RIGHT = "right" | ||
|
|
||
|
|
||
| class PackageAction(BaseModel): | ||
| action: Action | ||
| result: PackageResult | ||
| package: str | ||
|
|
||
|
|
||
| class Change(BaseModel): | ||
| lineNumber: int | ||
| description: Optional[str] | ||
| diffSide: DiffSide = DiffSide.RIGHT | ||
| properties: Optional[dict] = None | ||
| packageActions: Optional[list[PackageAction]] = None | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_lineNumber(self): | ||
| if self.lineNumber < 1: | ||
| raise ValueError("lineNumber must be greater than 0") | ||
| return self | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_description(self): | ||
| if self.description is not None and not self.description: | ||
| raise ValueError("description must not be empty") | ||
| return self | ||
|
|
||
|
|
||
| class ChangeSet(BaseModel): | ||
| path: str | ||
| diff: str | ||
| changes: list[Change] | ||
|
|
||
|
|
||
| class Reference(BaseModel): | ||
| url: str | ||
| description: Optional[str] = None | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_description(self): | ||
| self.description = self.description or self.url | ||
| return self | ||
|
|
||
|
|
||
| class Strategy(Enum): | ||
| ai = "ai" | ||
| hybrid = "hybrid" | ||
| deterministic = "deterministic" | ||
|
|
||
|
|
||
| class AIMetadata(BaseModel): | ||
| provider: Optional[str] = None | ||
| models: Optional[list[str]] = None | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Multiple models may be involved in fix generation |
||
| total_tokens: Optional[int] = None | ||
| completion_tokens: Optional[int] = None | ||
| prompt_tokens: Optional[int] = None | ||
|
|
||
|
|
||
| class GenerationMetadata(BaseModel): | ||
| strategy: Strategy | ||
| ai: Optional[AIMetadata] = None | ||
| provisional: bool | ||
|
|
||
|
|
||
| class FixMetadata(BaseModel): | ||
| # Fix provider ID, corresponds to legacy codemod ID | ||
| id: str | ||
| # A brief summary of the fix | ||
| summary: str | ||
| # A detailed description of the fix | ||
| description: str | ||
| references: list[Reference] | ||
| generation: GenerationMetadata | ||
|
|
||
|
|
||
| class Rating(BaseModel): | ||
| score: int | ||
| description: Optional[str] = None | ||
|
|
||
|
|
||
| class FixQuality(BaseModel): | ||
| safetyRating: Rating | ||
| effectivenessRating: Rating | ||
| cleanlinessRating: Rating | ||
|
|
||
|
|
||
| class FixResult(BaseModel): | ||
| """Result corresponding to a single finding""" | ||
|
|
||
| finding: Finding | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is now a strict 1:1 mapping between finding and fix. |
||
| fixStatus: FixStatus | ||
| changeSets: list[ChangeSet] | ||
| fixMetadata: Optional[FixMetadata] = None | ||
| fixQuality: Optional[FixQuality] = None | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix quality evaluation belongs to the entire result, not just to a single file/diff. |
||
| # A description of the reasoning process that led to the fix | ||
| reasoningSteps: Optional[list[str]] = None | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potentially each entry here should be something richer than a flat string: we might want to incorporate steps that represent user input/feedback.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could quibble that we have no idea what form that would take, and whether we should design for anything beyond a |
||
|
|
||
| @model_validator(mode="after") | ||
| def validate_fixMetadata(self): | ||
| if self.fixStatus.status == FixStatusType.fixed: | ||
| if not self.changeSets: | ||
| raise ValueError("changeSets must be provided for fixed results") | ||
| if not self.fixMetadata: | ||
| raise ValueError("fixMetadata must be provided for fixed results") | ||
| return self | ||
|
|
||
|
|
||
| class CodeTF(CodeTFWriter, BaseModel): | ||
| run: Run | ||
| results: list[FixResult] | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My expectation is that each finding in the input will have a corresponding result. I don't believe that's enforceable in the spec, and it probably should not be since we want to allow for the possibility of streaming subsets of results.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not sure why these requirements are at odds. Aren't we only giving it 1 input as well? We had discussed as an input a single SARIF, and a single result ID (or something.) In this case, we'd expect 0 or 1
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
As a practical matter I'm still not convinced that's the best approach although we can work towards that setup. In any case I don't think we want to preclude the possibility of batch processing.
I would argue that the absence of any |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I ever fully understood the rationale for the
resultfield: it might be worth discussing whether it's still necessary.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think associating a change to a dependency can allow downstream users to tell users add a dependency (like if it failed), or that this increases reliance on a dependency, etc.