Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into claudio/phtml
Browse files Browse the repository at this point in the history
  • Loading branch information
p4p3r committed Mar 28, 2024
2 parents e5c74a8 + 5bd634f commit 7438477
Show file tree
Hide file tree
Showing 152 changed files with 908 additions and 223 deletions.
7 changes: 7 additions & 0 deletions changelog.d/saf-845.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[IMPORTANT] Logged in users running `semgrep ci` will now run the pro engine by default! All `semgrep ci` scans will run with our proprietary languages (Apex and Elixir), as well as cross-function taint within a single file, and other single file pro optimizations we have developed. This is equivalent to `semgrep ci --pro-intrafile`. Users will likely see improved results if they are running `semgrep ci` and did not already have additional configuration to enable pro analysis.

The current default engine does not include cross-file analysis. To scan with cross-file analysis, turn on the app toggle or pass in the flag `--pro`. We recommend this unless you have very large repos (talk to our support to get help enabling cross-file analysis on monorepos!)

To revert back to our OSS analysis, pass the flag `--oss-only` (or use `--pro-languages` to continue to receive our proprietary languages).

Reminder: because we release first to our canary image, this change will only immediately affect you if you are using `semgrep/semgrep:canary`. If you are using `semgrep/semgrep:latest`, it will affect you when we bump canary to latest.
12 changes: 12 additions & 0 deletions changelog.d/saf-899.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Fixed a parsing error in Kotlin when there's a newline between the class name and the primary constructor.

This could not parse before

```
class C
constructor(arg:Int){}
```

because of the newline between the class name and the constructor.

Now it's fixed.
3 changes: 3 additions & 0 deletions changelog.d/scrt-531.added
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
`--historical-secrets` flag for running Semgrep Secrets regex rules on git
history (requires Semgrep Secrets). This flag is not yet implemented for
`--experimental`.
10 changes: 5 additions & 5 deletions cli/src/semgrep/commands/ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ def fix_head_if_github_action(metadata: GitMeta) -> None:
)
@click.option("--code", is_flag=True, hidden=True)
@click.option("--beta-testing-secrets", is_flag=True, hidden=True)
@click.option("--historical-secrets", is_flag=True, hidden=True)
@click.option(
"--secrets",
"run_secrets_flag",
Expand Down Expand Up @@ -383,11 +382,12 @@ def ci(

supply_chain_only = supply_chain and not code and not run_secrets
engine_type = EngineType.decide_engine_type(
requested_engine=requested_engine,
scan_handler=scan_handler,
git_meta=metadata,
logged_in=state.app_session.token is not None,
engine_flag=requested_engine,
run_secrets=run_secrets,
enable_pro_diff_scan=diff_depth >= 0,
interfile_diff_scan_enabled=diff_depth >= 0,
ci_scan_handler=scan_handler,
git_meta=metadata,
supply_chain_only=supply_chain_only,
)

Expand Down
18 changes: 10 additions & 8 deletions cli/src/semgrep/commands/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ def convert(
type=OutputFormat,
flag_value=OutputFormat.GITLAB_SECRETS,
),
optgroup.option(
"--historical-secrets",
"historical_secrets",
is_flag=True,
),
optgroup.option(
"--junit-xml",
"output_format",
Expand Down Expand Up @@ -408,11 +413,6 @@ def scan_options(func: Callable) -> Callable:
hidden=True,
help="Contact support@semgrep.com for more informationon this.",
)
@click.option(
"--historical-secrets",
"historical_secrets",
is_flag=True,
)
@scan_options
@handle_command_errors
def scan(
Expand Down Expand Up @@ -493,10 +493,13 @@ def scan(
"The flags --beta-testing-secrets-enabled and --oss are incompatible. Semgrep Secrets is a proprietary extension."
)

state = get_state()

engine_type = EngineType.decide_engine_type(
requested_engine=requested_engine,
logged_in=state.app_session.token is not None,
engine_flag=requested_engine,
run_secrets=run_secrets_flag,
enable_pro_diff_scan=diff_depth >= 0,
interfile_diff_scan_enabled=diff_depth >= 0,
)

# this is useful for our CI job to find where semgrep-core (or semgrep-core-proprietary)
Expand All @@ -511,7 +514,6 @@ def scan(
if dataflow_traces is None:
dataflow_traces = engine_type.has_dataflow_traces

state = get_state()
state.metrics.configure(metrics)
state.terminal.configure(
verbose=verbose,
Expand Down
99 changes: 64 additions & 35 deletions cli/src/semgrep/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,52 +33,81 @@ class EngineType(Enum):
@classmethod
def decide_engine_type(
cls,
requested_engine: Optional["EngineType"] = None,
scan_handler: Optional[ScanHandler] = None,
git_meta: Optional[GitMeta] = None,
logged_in: bool = False,
engine_flag: Optional["EngineType"] = None,
run_secrets: bool = False,
enable_pro_diff_scan: bool = False,
interfile_diff_scan_enabled: bool = False,
# ci-only args
ci_scan_handler: Optional[ScanHandler] = None,
git_meta: Optional[GitMeta] = None,
supply_chain_only: bool = False,
) -> "EngineType":
"""Select which Semgrep engine type to use if none is explicitly requested.
"""Determine which Semgrep engine type to run with.
Considers settings from Semgrep Cloud Platform and version control state.
Takes into account the following rules:
- The CLI flag > Semgrep Cloud Platform settings > defaults
- Requesting the secrets engine implies requesting PRO_INTRAFILE
- By default, logged in `semgrep ci` scans use PRO_INTRAFILE
- By default, all other scans use OSS
There are also some restrictions based on version control state and
product requested, to ensure users get fast scans when they expect it.
"""
# Change default to pro-engine intrafile if secrets was requested.
# Secrets is built into pro-engine, but any pro-setting should work.
interfile_is_requested_via_app = ci_scan_handler and ci_scan_handler.deepsemgrep

if engine_flag is not None:
requested_engine = engine_flag
elif interfile_is_requested_via_app:
requested_engine = cls.PRO_INTERFILE
elif run_secrets:
requested_engine = cls.PRO_INTRAFILE
elif logged_in and ci_scan_handler:
# - logged_in indicates that pro analysis is available to the user
# - ci_scan_handler indicates that `semgrep ci` was the entrypoint
# Given these two conditions, the default engine is PRO_INTRAFILE
# Note: `ci_scan_handler` currently requires being logged in, but
# we check both explicitly in case that changes
requested_engine = cls.PRO_INTRAFILE
else:
requested_engine = cls.OSS

# Override 1: diff scans should run with PRO_INTRAFILE when PRO_INTERFILE
# is requested. This ensures diff scans remain fast.
# TODO we can delete this once interfile diff scans are GA

diff_scan = git_meta and not git_meta.is_full_scan
if (
not (scan_handler and scan_handler.deepsemgrep)
and requested_engine is None
and run_secrets
diff_scan
and not interfile_diff_scan_enabled
and requested_engine is cls.PRO_INTERFILE
):
requested_engine = cls.PRO_LANG
elif run_secrets and requested_engine is cls.OSS:
# Should be impossible if the CLI gates impossible arguement combinations.
raise SemgrepError("Semgrep Secrets is not part of the open source engine")
requested_engine = cls.PRO_INTRAFILE

if git_meta and scan_handler:
if scan_handler.deepsemgrep and requested_engine is None:
requested_engine = cls.PRO_INTERFILE
# Override 2: Turn off PRO_INTERFILE when only supply chain is requested
# This is necessary because PRO_INTERFILE defaults to `-j 1`
if supply_chain_only and requested_engine is cls.PRO_INTERFILE:
requested_engine = cls.PRO_INTRAFILE

if (
requested_engine == cls.PRO_INTERFILE
and not git_meta.is_full_scan
and not enable_pro_diff_scan
):
requested_engine = cls.PRO_INTRAFILE
cls.validate_requested_engine(run_secrets, requested_engine)

# Using PRO_LANG engine since PRO_INTERFILE/PRO_INTRAFILE defaults to -j 1
# note if using OSS, then will keep using OSS
if (
requested_engine in {cls.PRO_INTERFILE, cls.PRO_INTRAFILE}
and supply_chain_only
):
logger.info(
"Running only supply chain rules so running without extra interfile analysis"
)
return cls.PRO_LANG
return requested_engine

@staticmethod
def validate_requested_engine(
run_secrets: bool, requested_engine: "EngineType"
) -> None:
"""Sanity check that the requested engine is compatible with the product
return requested_engine or cls.OSS
TODO Check if we need this step and remove it if we decide it's redundant
"""
# TODO: if we keep this step, should we also fail here if logged_in is false
# and requested_engine is not cls.OSS?
# This would no longer allow people to run the pro engine without log in
# if they acquire a copy of the binary (either through us or not)

if run_secrets and requested_engine is EngineType.OSS:
# Should be impossible if the CLI gates impossible argument combinations.
raise SemgrepError("Semgrep Secrets is not part of the open source engine")

def get_pro_version(self) -> str:
binary_path = self.get_binary_path()
Expand Down
2 changes: 2 additions & 0 deletions cli/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ def mask_floats(text_output: str) -> str:
re.compile(r"python (\d+[.]\d+[.]\d+[ ]+)"),
re.compile(r'SEMGREP_SETTINGS_FILE="(.+?)"'),
re.compile(r'SEMGREP_VERSION_CACHE_PATH="(.+?)"'),
re.compile(r"Using Semgrep Pro Version: (.*)"),
re.compile(r"Installed at (.*)"),
# Dates
re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:(?:\.\d+)?Z)?"),
# Hide any substring that resembles a temporary file path.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ SEMGREP_APP_TOKEN="fake-key-from-tests" SEMGREP_USER_AGENT_APPEND="pytest" SEMGR
Initializing scan (deployment=org_name)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down Expand Up @@ -746,7 +751,7 @@ Would have sent complete blob: {
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ SEMGREP_APP_TOKEN="fake_key" SEMGREP_USER_AGENT_APPEND="pytest" SEMGREP_SETTINGS
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ SEMGREP_APP_TOKEN="fake_key" SEMGREP_USER_AGENT_APPEND="pytest" SEMGREP_SETTINGS
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ BUILD_BUILDID="some_id" SEMGREP_REPO_NAME="a/repo/name" SEMGREP_REPO_URL="https:
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ BUILD_BUILDID="some_id" BUILD_REPOSITORY_URI="https://github.com/project_name/pr
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ CI="true" BITBUCKET_BUILD_NUMBER="hi" SEMGREP_REPO_NAME="a/repo/name" SEMGREP_RE
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ CI="true" BITBUCKET_BUILD_NUMBER="hi" BITBUCKET_REPO_FULL_NAME="project_name/pro
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ BUILDKITE="true" SEMGREP_REPO_NAME="a/repo/name" SEMGREP_REPO_URL="https://rando
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"num_bytes": 366
}
},
"engine_requested": "OSS",
"engine_requested": "PRO_INTRAFILE",
"findings_by_product": {
"code": 16,
"supply-chain": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ BUILDKITE="true" BUILDKITE_REPO="git@github.com/project_name/project_name.git" B
Initializing scan (deployment=org_name, scan_id=12345)
Enabled products: Code, Supply Chain

ENGINE
Using Semgrep Pro Version: <MASKED>
Installed at <MASKED>


┌─────────────┐
│ Scan Status │
└─────────────┘
Expand Down
Loading

0 comments on commit 7438477

Please sign in to comment.