Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

ci(semgrep): fix semgrep and lint failures #92

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions .semgrepignore
@@ -0,0 +1,13 @@
# Ignore git items
.gitignore
.git/
:include .gitignore

# Test cases, examples and workflows
tests/
.github/
examples/
tool/

# For removed files
deprecated_files/
2 changes: 1 addition & 1 deletion cloudconnectlib/common/util.py
Expand Up @@ -48,7 +48,7 @@ def load_json_file(file_path):
:param file_path: JSON file path.
:return: A `dict` object.
"""
with open(file_path) as file_pointer:
with open(file_path, encoding="UTF-8") as file_pointer:
return json.load(file_pointer)


Expand Down
7 changes: 5 additions & 2 deletions cloudconnectlib/configuration/loader.py
Expand Up @@ -64,7 +64,7 @@ def _get_schema_from_file(schema_file):
"""
try:
return load_json_file(schema_file)
except:
except Exception:
raise ConfigException(
"Cannot load schema from file {}: {}".format(
schema_file, traceback.format_exc()
Expand Down Expand Up @@ -236,7 +236,10 @@ def _load_processor(self, processor):
pipeline = self._parse_tasks(processor.get("pipeline", []))
return Processor(skip_conditions=skip_conditions, pipeline=pipeline)

def _load_request(self, request):
# nosemgrep reason - false positive, `request` is a dict here
def _load_request( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
self, request
):
options = self._load_options(request["request"])

pre_process = self._load_processor(request.get("pre_process", {}))
Expand Down
10 changes: 8 additions & 2 deletions cloudconnectlib/core/engine.py
Expand Up @@ -92,7 +92,10 @@ class Job:
reached it's stop condition.
"""

def __init__(self, request, context, checkpoint_mgr, proxy=None):
# nosemgrep reason - false positive, `request` is a Munch object which has settings for making a request
def __init__( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
self, request, context, checkpoint_mgr, proxy=None
):
"""
Constructs a `Job` with properties request, context and a
optional proxy setting.
Expand Down Expand Up @@ -301,7 +304,10 @@ def _run(self):
_logger.info("Stop condition reached, exit job now")
break

def _send_request(self, request):
# nosemgrep reason - false positive, `request` is a dict here
def _send_request( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
self, request
):
"""Do send request with a simple error handling strategy. Refer to
https://confluence.splunk.com/display/PROD/CC+1.0+-+Detail+Design"""
try:
Expand Down
5 changes: 4 additions & 1 deletion cloudconnectlib/core/http.py
Expand Up @@ -288,7 +288,10 @@ def _initialize_connection(self):
_logger.info("Proxy is not enabled for http connection.")
self._connection = self._build_http_connection(self._proxy_info)

def send(self, request):
# nosemgrep reason - false positive, `request` is a dict here having configs required for making request
def send( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
self, request
):
if not request:
raise ValueError("The request is none")
if request.body and not isinstance(request.body, str):
Expand Down
6 changes: 5 additions & 1 deletion cloudconnectlib/core/plugin.py
Expand Up @@ -86,7 +86,11 @@ def import_plugin_file(file_name):
return

try:
importlib.import_module(module_name)
# nosemgrep reason - this function only imports a custom cce_plugin_<addon's plugin>.py,
# which is provided by an addon and programmatically provided.
importlib.import_module( # nosemgrep: python.lang.security.audit.non-literal-import.non-literal-import
module_name
)
except Exception:
logger.warning(f"Failed to load module {module_name}, {traceback.format_exc()}")
return
Expand Down
18 changes: 14 additions & 4 deletions cloudconnectlib/core/task.py
Expand Up @@ -88,7 +88,10 @@ def render(self, context):


class RequestTemplate:
def __init__(self, request):
# nosemgrep reason - false positive, `request` is a dict here having configs required for making request
def __init__( # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
self, request
):
if not request:
raise ValueError("The request is none")
url = request.get("url")
Expand Down Expand Up @@ -295,7 +298,8 @@ def perform(self, context):
try:
invoke_results = self._process_handler.execute(context)
except Exception:
logger.exception("Task=%s encountered exception", self)
# Fixing `python.lang.best-practice.logging-error-without-handling.logging-error-without-handling`
logger.warn("Task=%s encountered exception", self)
raise CCESplitError
if not invoke_results or not invoke_results.get(CCESplitTask.OUTPUT_KEY):
raise CCESplitError
Expand All @@ -316,7 +320,10 @@ class CCEHTTPRequestTask(BaseTask):
from context when executing.
"""

def __init__(self, request, name, meta_config=None, task_config=None, **kwargs):
# nosemgrep reason - false positive, `request` is a dict here having configs required for making request
def __init__(
self, request, name, meta_config=None, task_config=None, **kwargs
): # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
"""
:param verify: Absolute path to server certificate, otherwise uses
requests' default certificate to verify server's TLS certificate.
Expand Down Expand Up @@ -464,7 +471,10 @@ def _should_exit(self, done_count, context):
return True
return False

def _send_request(self, request):
# nosemgrep reason - false positive, `request` is a dict here having configs required for making request
def _send_request(
self, request
): # nosemgrep: python.django.security.audit.django-ratelimit.missing-ratelimit.missing-ratelimit
try:
response = self._http_client.send(request)
except HTTPError as error:
Expand Down
5 changes: 4 additions & 1 deletion cloudconnectlib/core/template.py
Expand Up @@ -31,6 +31,9 @@ def translate_internal(context):
if match:
context_var = context.get(match.groups()[0])
return context_var if context_var else ""
return _template.render(context)
# nosemgrep reason - the `context` passed is provided by addon, and it should sanitized by the addon.
return _template.render( # nosemgrep: python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2 # noqa: E501 - semgrep name is too long
context
)

return translate_internal
2 changes: 1 addition & 1 deletion cloudconnectlib/splunktacollectorlib/common/__init__.py
Expand Up @@ -22,7 +22,7 @@ def load_schema_file(schema_file):
Load schema file.
"""

with open(schema_file) as f:
with open(schema_file, encoding="UTF-8") as f:
ret = json.load(f)

common = ret.get("_common_", dict())
Expand Down
27 changes: 20 additions & 7 deletions cloudconnectlib/splunktacollectorlib/config.py
Expand Up @@ -149,8 +149,10 @@ def load(self):
else:
break
else:
log(exc, level=logging.ERROR, need_tb=True)
raise exc
# F821 reason - when `ConfigException` would be caught, `exc` would be defined.
# The uncaught exceptions would break the code flow.
log(exc, level=logging.ERROR, need_tb=True) # noqa: F821
raise exc # noqa: F821

log('"load" method out', level=logging.DEBUG)
return ret
Expand Down Expand Up @@ -185,7 +187,7 @@ def update_items(
level=logging.DEBUG,
)

assert (
assert ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint_id. Raises AssertionError otherwise
endpoint_id in self._endpoints
), "Unexpected endpoint id in given schema - {ep_id}" "".format(
ep_id=endpoint_id
Expand Down Expand Up @@ -314,20 +316,31 @@ def _parse_schema(self, ucc_config_schema):
}
)
for field in Config.META_FIELDS:
assert field in ucc_config_schema and isinstance(
ucc_config_schema[field], str
# fmt: off
assert ( # nosemgrep: gitlab.bandit.B101 - additional check for ucc_config_schema.
field in ucc_config_schema
and isinstance(
ucc_config_schema[field], str
)
), ('Missing or invalid field "%s" in given schema' % field)
# fmt: on
setattr(self, field, ucc_config_schema[field])

self._endpoints = {}
for key, val in ucc_config_schema.items():
if key.startswith("_"):
continue

assert isinstance(val, dict), (
assert isinstance( # nosemgrep: gitlab.bandit.B101 - additional check `val` type.
val, dict
), (
'The schema of endpoint "%s" should be dict' % key
)
assert "endpoint" in val, 'The endpoint "%s" has no endpoint entry' % key
assert ( # nosemgrep: gitlab.bandit.B101 - additional check for endpoint in `val`.
"endpoint" in val
), (
'The endpoint "%s" has no endpoint entry' % key
)

self._endpoints[key] = val

Expand Down
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import threading
import time
from collections import namedtuple

from splunktalib.common import util as scu
Expand Down Expand Up @@ -110,16 +109,22 @@ def _build_event(self, events):
events = [events]
evts = []
for event in events:
assert event.raw_data, "the raw data of events is empty"
if event.is_unbroken:
assert ( # nosemgrep: gitlab.bandit.B101 - additional check for raw data in a single event.
event.raw_data
), "the raw data of events is empty"
if (
event.is_unbroken # nosemgrep: python.lang.maintainability.is-function-without-parentheses.is-function-without-parentheses - false positive, it is a property and not a function. # noqa: E501 - semgrep name is too long
):
evt = unbroken_evt_fmt.format(
event.host or "",
event.source or "",
event.sourcetype or "",
event.time or "",
event.index or "",
scu.escape_cdata(event.raw_data),
"<done/>" if event.is_done else "",
"<done/>"
if event.is_done # nosemgrep: python.lang.maintainability.is-function-without-parentheses.is-function-without-parentheses - false positive, it is a property and not a function. # noqa: E501 - semgrep name is too long
else "",
)
else:
evt = evt_fmt.format(
Expand Down
Expand Up @@ -86,7 +86,7 @@ def _enqueue_io_job(job):
def _wait_for_tear_down(self):
wakeup_q = self._wakeup_queue
while 1:
try:
try: # nosemgrep: gitlab.bandit.B110 -- nothing to do when queue is empty.
go_exit = wakeup_q.get(timeout=1)
except queue.Empty:
pass
Expand Down
Expand Up @@ -54,7 +54,9 @@ def get_md5(data):
:param data:
:return:
"""
assert data is not None, "The input cannot be None"
assert ( # nosemgrep: gitlab.bandit.B101 - checking for data to not be None
data is not None
), "The input cannot be None"
if isinstance(data, str):
return hashlib.sha256(data.encode("utf-8")).hexdigest()
elif isinstance(data, (list, tuple, dict)):
Expand Down Expand Up @@ -124,10 +126,12 @@ def _load_conf_contents(self):

def _process_division(self, division_endpoint, division_contents):
division_metrics = []
assert isinstance(division_contents, dict)
assert isinstance( # nosemgrep: gitlab.bandit.B101 - checking for type of `division_contents`
division_contents, dict
)
for division_key, division_value in division_contents.items():
try:
assert (
assert ( # nosemgrep: gitlab.bandit.B101
self.TYPE in division_value
and division_value[self.TYPE] in [self.TYPE_SINGLE, self.TYPE_MULTI]
and self.SEPARATOR in division_value
Expand Down
Expand Up @@ -35,7 +35,7 @@
from . import ta_checkpoint_manager as cpmgr
from . import ta_config as tc
from . import ta_data_client as tdc
from . import ta_data_loader as dl
from . import ta_data_loader as tdl

utils.remove_http_proxy_env_vars()

Expand Down Expand Up @@ -171,7 +171,7 @@ def run(
# http://bugs.python.org/issue7980
time.strptime("2016-01-01", "%Y-%m-%d")

loader = dl.create_data_loader()
loader = tdl.create_data_loader()

# handle signal
_setup_signal_handler(loader, ta_short_name)
Expand Down Expand Up @@ -272,8 +272,12 @@ def main(
"""
Main entry point
"""
assert collector_cls, "ucc modinput collector is None."
assert schema_file_path, "ucc modinput schema file is None"
assert ( # nosemgrep: gitlab.bandit.B101 - check for required params to be passed
collector_cls
), "ucc modinput collector is None."
assert ( # nosemgrep: gitlab.bandit.B101 - check for required params to be passed
schema_file_path
), "ucc modinput schema file is None"

settings = ld(schema_file_path)

Expand All @@ -289,8 +293,6 @@ def main(
)
elif args[1] == "--validate-arguments":
sys.exit(validate_config())
elif args[1] in ("-h", "--h", "--help"):
usage()
else:
usage()
else:
Expand Down
Expand Up @@ -23,7 +23,9 @@

ta_name = os.path.basename(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ta_lib_name = re.sub(r"[^\w]+", "_", ta_name.lower())
assert ta_name or ta_name == "package", "TA name is None or package"
assert ( # nosemgrep: gitlab.bandit.B101 - check for `ta_name` to be a valid value
ta_name or ta_name == "package"
), "TA name is None or package"
pattern = re.compile(r"[\\/]etc[\\/]apps[\\/][^\\/]+[\\/]bin[\\/]?$")
new_paths = [path for path in sys.path if not pattern.search(path) or ta_name in path]
new_paths.insert(0, os.path.sep.join([os.path.dirname(__file__), ta_lib_name]))
Expand Down