[Coding Conventions] Enforce snake_case for variables and module names (

#5597)
mlrun · May 22, 2024 · fa5cce9 · fa5cce9
1 parent 2e01eb2
commit fa5cce9
Show file tree

Hide file tree

Showing 13 changed files with 43 additions and 40 deletions.
diff --git a/docs/tutorials/src/script.py b/docs/tutorials/src/script.py
@@ -44,7 +44,7 @@ def sphere_dist(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):
     Return distance along great radius between pickup and drop-off coordinates.
     """
     # Define earth radius (km)
-    R_earth = 6371
+    r_earth = 6371
     # Convert degrees to radians
     pickup_lat, pickup_lon, dropoff_lat, dropoff_lon = map(
         np.radians, [pickup_lat, pickup_lon, dropoff_lat, dropoff_lon]
@@ -58,7 +58,7 @@ def sphere_dist(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):
         np.sin(dlat / 2.0) ** 2
         + np.cos(pickup_lat) * np.cos(dropoff_lat) * np.sin(dlon / 2.0) ** 2
     )
-    return 2 * R_earth * np.arcsin(np.sqrt(a))
+    return 2 * r_earth * np.arcsin(np.sqrt(a))
 
 
 def sphere_dist_bear(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):

diff --git a/docs/tutorials/src/trainer.py b/docs/tutorials/src/trainer.py
@@ -17,12 +17,12 @@ def train(
     df = dataset.as_df()
 
     # Initialize the x & y data
-    X = df.drop(label_column, axis=1)
+    x = df.drop(label_column, axis=1)
     y = df[label_column]
 
     # Train/Test split the dataset
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=42
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42
     )
 
     # Pick an ideal ML model
@@ -32,8 +32,8 @@ def train(
 
     # -------------------- The only line you need to add for MLOps -------------------------
     # Wraps the model with MLOps (test set is provided for analysis & accuracy measurements)
-    apply_mlrun(model=model, model_name=model_name, x_test=X_test, y_test=y_test)
+    apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
     # --------------------------------------------------------------------------------------
 
     # Train the model
-    model.fit(X_train, y_train)
+    model.fit(x_train, y_train)
diff --git a/mlrun/data_types/to_pandas.py b/mlrun/data_types/to_pandas.py
@@ -154,10 +154,10 @@ def toPandas(spark_df):
     column_counter = Counter(spark_df.columns)
 
     dtype = [None] * len(spark_df.schema)
-    for fieldIdx, field in enumerate(spark_df.schema):
+    for field_idx, field in enumerate(spark_df.schema):
         # For duplicate column name, we use `iloc` to access it.
         if column_counter[field.name] > 1:
-            pandas_col = pdf.iloc[:, fieldIdx]
+            pandas_col = pdf.iloc[:, field_idx]
         else:
             pandas_col = pdf[field.name]
 
@@ -171,12 +171,12 @@ def toPandas(spark_df):
             and field.nullable
             and pandas_col.isnull().any()
         ):
-            dtype[fieldIdx] = pandas_type
+            dtype[field_idx] = pandas_type
         # Ensure we fall back to nullable numpy types, even when whole column is null:
         if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = np.float64
+            dtype[field_idx] = np.float64
         if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = object
+            dtype[field_idx] = object
 
     df = pd.DataFrame()
     for index, t in enumerate(dtype):

diff --git a/mlrun/datastore/redis.py b/mlrun/datastore/redis.py
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
     """
 
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
-        REDIS_DEFAULT_PORT = "6379"
+        redis_default_port = "6379"
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self.headers = None
 
@@ -49,7 +49,7 @@ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
         password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
         host = parsed_endpoint.hostname
-        port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
+        port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
         schema = parsed_endpoint.scheme
         if user or password:
             endpoint = f"{schema}://{user}:{password}@{host}:{port}"

diff --git a/mlrun/datastore/targets.py b/mlrun/datastore/targets.py
@@ -2134,7 +2134,7 @@ def _create_sql_table(self):
                 raise ValueError(f"Table named {table_name} is not exist")
 
             elif not table_exists and create_table:
-                TYPE_TO_SQL_TYPE = {
+                type_to_sql_type = {
                     int: sqlalchemy.Integer,
                     str: sqlalchemy.String(self.attributes.get("varchar_len")),
                     datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -2147,7 +2147,7 @@ def _create_sql_table(self):
                 # creat new table with the given name
                 columns = []
                 for col, col_type in self.schema.items():
-                    col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
+                    col_type_sql = type_to_sql_type.get(col_type)
                     if col_type_sql is None:
                         raise TypeError(
                             f"'{col_type}' unsupported type for column '{col}'"

diff --git a/mlrun/feature_store/retrieval/conversion.py b/mlrun/feature_store/retrieval/conversion.py
@@ -168,10 +168,10 @@ def toPandas(self):
         column_counter = Counter(self.columns)
 
         dtype = [None] * len(self.schema)
-        for fieldIdx, field in enumerate(self.schema):
+        for field_idx, field in enumerate(self.schema):
             # For duplicate column name, we use `iloc` to access it.
             if column_counter[field.name] > 1:
-                pandas_col = pdf.iloc[:, fieldIdx]
+                pandas_col = pdf.iloc[:, field_idx]
             else:
                 pandas_col = pdf[field.name]
 
@@ -187,12 +187,12 @@ def toPandas(self):
                 and field.nullable
                 and pandas_col.isnull().any()
             ):
-                dtype[fieldIdx] = pandas_type
+                dtype[field_idx] = pandas_type
             # Ensure we fall back to nullable numpy types, even when whole column is null:
             if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
-                dtype[fieldIdx] = np.float64
+                dtype[field_idx] = np.float64
             if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
-                dtype[fieldIdx] = object
+                dtype[field_idx] = object
 
         df = pd.DataFrame()
         for index, t in enumerate(dtype):

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,9 @@ extend-select = [
     "I",   # isort
     "UP",  # pyupgrade
     "CPY", # flake8-copyright
+    "N806", # lowercase variable names
+    "N816", # snake_case for global variable names
+    "N999", # snake_case for module names
 ]
 exclude = ["*.ipynb"]
 explicit-preview-rules = true

diff --git a/server/api/migrations_sqlite/tests/test_migrations.py b/server/api/migrations_sqlite/tests/test_migrations.py
@@ -104,9 +104,9 @@ class Constants:
 
 @pytest.fixture
 def alembic_session(alembic_engine):
-    Session = sessionmaker()
-    Session.configure(bind=alembic_engine)
-    session = Session()
+    session_maker = sessionmaker()
+    session_maker.configure(bind=alembic_engine)
+    session = session_maker()
     return session
 
 

diff --git a/tests/artifacts/test_dataset.py b/tests/artifacts/test_dataset.py
@@ -270,8 +270,8 @@ def test_dataset_preview_size_limit_from_large_dask_dataframe(monkeypatch):
     monkeypatch.setattr(mlrun.artifacts.dataset, "max_ddf_size", 0.001)
 
     print("Creating dataframe and setting memory limit")
-    A = numpy.random.random_sample(size=(50000, 6))
-    df = pandas.DataFrame(data=A, columns=list("ABCDEF"))
+    data = numpy.random.random_sample(size=(50000, 6))
+    df = pandas.DataFrame(data=data, columns=list("ABCDEF"))
     print("Verify the memory size of the dataframe is >400MB")
     assert (df.memory_usage().sum() // 1e3) > 200
     ddf = dd.from_pandas(df, npartitions=4)

diff --git a/tests/http_srv.py b/tests/http_srv.py
@@ -67,8 +67,8 @@ def create_function(handler, port):
     def func_wrap(self, context, event):
         return handler(context, event)
 
-    CustomHandler = Handler
-    CustomHandler.handler_function = func_wrap
+    custom_handler = Handler
+    custom_handler.handler_function = func_wrap
 
-    server = ThreadingSimpleServer(("0.0.0.0", port), CustomHandler)
+    server = ThreadingSimpleServer(("0.0.0.0", port), custom_handler)
     server.serve_forever()
diff --git a/tests/integration/azure_blob/test_dask_dataitem_to_azure_blob.py b/tests/integration/azure_blob/test_dask_dataitem_to_azure_blob.py
@@ -94,8 +94,8 @@ def test_log_dask_to_azure(auth_method):
     verify_auth_parameters_and_configure_env(auth_method)
     artifact_path = "az://" + config["env"].get("AZURE_CONTAINER") + "/"
 
-    A = np.random.randint(0, 100, size=(10000, 4))
-    df = pd.DataFrame(data=A, columns=list("ABCD"))
+    data = np.random.randint(0, 100, size=(10000, 4))
+    df = pd.DataFrame(data=data, columns=list("ABCD"))
     ddf = dd.from_pandas(df, npartitions=4)
 
     context = mlrun.get_or_create_ctx("test")
@@ -117,8 +117,8 @@ def test_log_large_dask_dataframe_to_azure(auth_method):
     # Create the environmental variables
     verify_auth_parameters_and_configure_env(auth_method)
 
-    A = np.random.random_sample(size=(25000000, 6))
-    df = pd.DataFrame(data=A, columns=list("ABCDEF"))
+    data = np.random.random_sample(size=(25000000, 6))
+    df = pd.DataFrame(data=data, columns=list("ABCDEF"))
     ddf = dd.from_pandas(df, npartitions=10).persist()
 
     size = ddf.memory_usage().sum().compute()

diff --git a/tests/system/feature_store/test_feature_store.py b/tests/system/feature_store/test_feature_store.py
@@ -4337,7 +4337,7 @@ def test_pandas_ingest_from_parquet(self, with_indexes):
             orig_df.set_index(["enfmtxfg", "hmwaebdl"], inplace=True)
         parquet_path = f"v3io:///projects/{self.project_name}/trfsinojud.parquet"
         orig_df.to_parquet(parquet_path)
-        gnrxRnIYSr = ParquetSource(path=parquet_path)
+        source = ParquetSource(path=parquet_path)
 
         if with_indexes:
             fset = fstore.FeatureSet(
@@ -4347,7 +4347,7 @@ def test_pandas_ingest_from_parquet(self, with_indexes):
             )
         else:
             fset = fstore.FeatureSet("VIeHOGZgjv", engine="pandas")
-        df = fset.ingest(source=gnrxRnIYSr)
+        df = fset.ingest(source=source)
         assert df.equals(orig_df)
 
     @TestMLRunSystem.skip_test_if_env_not_configured

diff --git a/tests/track/platform_trackers/test_mlflow_tracker.py b/tests/track/platform_trackers/test_mlflow_tracker.py
@@ -74,16 +74,16 @@ def interrupted_run():
 def lgb_run():
     # prepare train and test data
     iris = datasets.load_iris()
-    X = iris.data
+    x = iris.data
     y = iris.target
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=42
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42
     )
 
     # enable auto logging
     mlflow.lightgbm.autolog()
 
-    train_set = lgb.Dataset(X_train, label=y_train)
+    train_set = lgb.Dataset(x_train, label=y_train)
 
     with mlflow.start_run():
         # train model
@@ -106,7 +106,7 @@ def lgb_run():
         )
 
         # evaluate model
-        y_proba = model.predict(X_test)
+        y_proba = model.predict(x_test)
         y_pred = y_proba.argmax(axis=1)
         loss = log_loss(y_test, y_proba)
         acc = accuracy_score(y_test, y_pred)