Skip to content

Commit

Permalink
[Coding Conventions] Enforce snake_case for variables and module names (
Browse files Browse the repository at this point in the history
  • Loading branch information
Yacouby committed May 22, 2024
1 parent 2e01eb2 commit fa5cce9
Show file tree
Hide file tree
Showing 13 changed files with 43 additions and 40 deletions.
4 changes: 2 additions & 2 deletions docs/tutorials/src/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def sphere_dist(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):
Return distance along great radius between pickup and drop-off coordinates.
"""
# Define earth radius (km)
R_earth = 6371
r_earth = 6371
# Convert degrees to radians
pickup_lat, pickup_lon, dropoff_lat, dropoff_lon = map(
np.radians, [pickup_lat, pickup_lon, dropoff_lat, dropoff_lon]
Expand All @@ -58,7 +58,7 @@ def sphere_dist(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):
np.sin(dlat / 2.0) ** 2
+ np.cos(pickup_lat) * np.cos(dropoff_lat) * np.sin(dlon / 2.0) ** 2
)
return 2 * R_earth * np.arcsin(np.sqrt(a))
return 2 * r_earth * np.arcsin(np.sqrt(a))


def sphere_dist_bear(pickup_lat, pickup_lon, dropoff_lat, dropoff_lon):
Expand Down
10 changes: 5 additions & 5 deletions docs/tutorials/src/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ def train(
df = dataset.as_df()

# Initialize the x & y data
X = df.drop(label_column, axis=1)
x = df.drop(label_column, axis=1)
y = df[label_column]

# Train/Test split the dataset
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=42
)

# Pick an ideal ML model
Expand All @@ -32,8 +32,8 @@ def train(

# -------------------- The only line you need to add for MLOps -------------------------
# Wraps the model with MLOps (test set is provided for analysis & accuracy measurements)
apply_mlrun(model=model, model_name=model_name, x_test=X_test, y_test=y_test)
apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
# --------------------------------------------------------------------------------------

# Train the model
model.fit(X_train, y_train)
model.fit(x_train, y_train)
10 changes: 5 additions & 5 deletions mlrun/data_types/to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ def toPandas(spark_df):
column_counter = Counter(spark_df.columns)

dtype = [None] * len(spark_df.schema)
for fieldIdx, field in enumerate(spark_df.schema):
for field_idx, field in enumerate(spark_df.schema):
# For duplicate column name, we use `iloc` to access it.
if column_counter[field.name] > 1:
pandas_col = pdf.iloc[:, fieldIdx]
pandas_col = pdf.iloc[:, field_idx]
else:
pandas_col = pdf[field.name]

Expand All @@ -171,12 +171,12 @@ def toPandas(spark_df):
and field.nullable
and pandas_col.isnull().any()
):
dtype[fieldIdx] = pandas_type
dtype[field_idx] = pandas_type
# Ensure we fall back to nullable numpy types, even when whole column is null:
if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
dtype[fieldIdx] = np.float64
dtype[field_idx] = np.float64
if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
dtype[fieldIdx] = object
dtype[field_idx] = object

df = pd.DataFrame()
for index, t in enumerate(dtype):
Expand Down
4 changes: 2 additions & 2 deletions mlrun/datastore/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class RedisStore(DataStore):
"""

def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
REDIS_DEFAULT_PORT = "6379"
redis_default_port = "6379"
super().__init__(parent, name, schema, endpoint, secrets=secrets)
self.headers = None

Expand All @@ -49,7 +49,7 @@ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
host = parsed_endpoint.hostname
port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
schema = parsed_endpoint.scheme
if user or password:
endpoint = f"{schema}://{user}:{password}@{host}:{port}"
Expand Down
4 changes: 2 additions & 2 deletions mlrun/datastore/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2134,7 +2134,7 @@ def _create_sql_table(self):
raise ValueError(f"Table named {table_name} is not exist")

elif not table_exists and create_table:
TYPE_TO_SQL_TYPE = {
type_to_sql_type = {
int: sqlalchemy.Integer,
str: sqlalchemy.String(self.attributes.get("varchar_len")),
datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
Expand All @@ -2147,7 +2147,7 @@ def _create_sql_table(self):
# creat new table with the given name
columns = []
for col, col_type in self.schema.items():
col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
col_type_sql = type_to_sql_type.get(col_type)
if col_type_sql is None:
raise TypeError(
f"'{col_type}' unsupported type for column '{col}'"
Expand Down
10 changes: 5 additions & 5 deletions mlrun/feature_store/retrieval/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,10 @@ def toPandas(self):
column_counter = Counter(self.columns)

dtype = [None] * len(self.schema)
for fieldIdx, field in enumerate(self.schema):
for field_idx, field in enumerate(self.schema):
# For duplicate column name, we use `iloc` to access it.
if column_counter[field.name] > 1:
pandas_col = pdf.iloc[:, fieldIdx]
pandas_col = pdf.iloc[:, field_idx]
else:
pandas_col = pdf[field.name]

Expand All @@ -187,12 +187,12 @@ def toPandas(self):
and field.nullable
and pandas_col.isnull().any()
):
dtype[fieldIdx] = pandas_type
dtype[field_idx] = pandas_type
# Ensure we fall back to nullable numpy types, even when whole column is null:
if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
dtype[fieldIdx] = np.float64
dtype[field_idx] = np.float64
if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
dtype[fieldIdx] = object
dtype[field_idx] = object

df = pd.DataFrame()
for index, t in enumerate(dtype):
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ extend-select = [
"I", # isort
"UP", # pyupgrade
"CPY", # flake8-copyright
"N806", # lowercase variable names
"N816", # snake_case for global variable names
"N999", # snake_case for module names
]
exclude = ["*.ipynb"]
explicit-preview-rules = true
Expand Down
6 changes: 3 additions & 3 deletions server/api/migrations_sqlite/tests/test_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ class Constants:

@pytest.fixture
def alembic_session(alembic_engine):
Session = sessionmaker()
Session.configure(bind=alembic_engine)
session = Session()
session_maker = sessionmaker()
session_maker.configure(bind=alembic_engine)
session = session_maker()
return session


Expand Down
4 changes: 2 additions & 2 deletions tests/artifacts/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def test_dataset_preview_size_limit_from_large_dask_dataframe(monkeypatch):
monkeypatch.setattr(mlrun.artifacts.dataset, "max_ddf_size", 0.001)

print("Creating dataframe and setting memory limit")
A = numpy.random.random_sample(size=(50000, 6))
df = pandas.DataFrame(data=A, columns=list("ABCDEF"))
data = numpy.random.random_sample(size=(50000, 6))
df = pandas.DataFrame(data=data, columns=list("ABCDEF"))
print("Verify the memory size of the dataframe is >400MB")
assert (df.memory_usage().sum() // 1e3) > 200
ddf = dd.from_pandas(df, npartitions=4)
Expand Down
6 changes: 3 additions & 3 deletions tests/http_srv.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def create_function(handler, port):
def func_wrap(self, context, event):
return handler(context, event)

CustomHandler = Handler
CustomHandler.handler_function = func_wrap
custom_handler = Handler
custom_handler.handler_function = func_wrap

server = ThreadingSimpleServer(("0.0.0.0", port), CustomHandler)
server = ThreadingSimpleServer(("0.0.0.0", port), custom_handler)
server.serve_forever()
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def test_log_dask_to_azure(auth_method):
verify_auth_parameters_and_configure_env(auth_method)
artifact_path = "az://" + config["env"].get("AZURE_CONTAINER") + "/"

A = np.random.randint(0, 100, size=(10000, 4))
df = pd.DataFrame(data=A, columns=list("ABCD"))
data = np.random.randint(0, 100, size=(10000, 4))
df = pd.DataFrame(data=data, columns=list("ABCD"))
ddf = dd.from_pandas(df, npartitions=4)

context = mlrun.get_or_create_ctx("test")
Expand All @@ -117,8 +117,8 @@ def test_log_large_dask_dataframe_to_azure(auth_method):
# Create the environmental variables
verify_auth_parameters_and_configure_env(auth_method)

A = np.random.random_sample(size=(25000000, 6))
df = pd.DataFrame(data=A, columns=list("ABCDEF"))
data = np.random.random_sample(size=(25000000, 6))
df = pd.DataFrame(data=data, columns=list("ABCDEF"))
ddf = dd.from_pandas(df, npartitions=10).persist()

size = ddf.memory_usage().sum().compute()
Expand Down
4 changes: 2 additions & 2 deletions tests/system/feature_store/test_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4337,7 +4337,7 @@ def test_pandas_ingest_from_parquet(self, with_indexes):
orig_df.set_index(["enfmtxfg", "hmwaebdl"], inplace=True)
parquet_path = f"v3io:///projects/{self.project_name}/trfsinojud.parquet"
orig_df.to_parquet(parquet_path)
gnrxRnIYSr = ParquetSource(path=parquet_path)
source = ParquetSource(path=parquet_path)

if with_indexes:
fset = fstore.FeatureSet(
Expand All @@ -4347,7 +4347,7 @@ def test_pandas_ingest_from_parquet(self, with_indexes):
)
else:
fset = fstore.FeatureSet("VIeHOGZgjv", engine="pandas")
df = fset.ingest(source=gnrxRnIYSr)
df = fset.ingest(source=source)
assert df.equals(orig_df)

@TestMLRunSystem.skip_test_if_env_not_configured
Expand Down
10 changes: 5 additions & 5 deletions tests/track/platform_trackers/test_mlflow_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ def interrupted_run():
def lgb_run():
# prepare train and test data
iris = datasets.load_iris()
X = iris.data
x = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=42
)

# enable auto logging
mlflow.lightgbm.autolog()

train_set = lgb.Dataset(X_train, label=y_train)
train_set = lgb.Dataset(x_train, label=y_train)

with mlflow.start_run():
# train model
Expand All @@ -106,7 +106,7 @@ def lgb_run():
)

# evaluate model
y_proba = model.predict(X_test)
y_proba = model.predict(x_test)
y_pred = y_proba.argmax(axis=1)
loss = log_loss(y_test, y_proba)
acc = accuracy_score(y_test, y_pred)
Expand Down

0 comments on commit fa5cce9

Please sign in to comment.