Skip to content

Commit

Permalink
Release v0.0.0.6 with updates to prep_df_nn
Browse files Browse the repository at this point in the history
  • Loading branch information
sarahshi committed May 15, 2024
1 parent 7684dbb commit cb65bc5
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/mineralML/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
# 1) we don't load dependencies by storing it in __init__.py
# 2) we can import it in setup.py for the same reason
# 3) we can import it into your module
__version__ = '0.0.0.5'
__version__ = '0.0.0.6'
17 changes: 11 additions & 6 deletions src/mineralML/supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,14 @@ def prep_df_nn(df):
]

# Ensure all required columns are present in the DataFrame
for col in oxides + ['Mineral', 'SampleID']:
for col in oxides + ["Mineral", "SampleID"]:
if col not in df.columns:
df[col] = np.nan
warnings.warn(f"The column '{col}' was missing and has been filled with NaN.",
UserWarning, stacklevel=2)
warnings.warn(
f"The column '{col}' was missing and has been filled with NaN.",
UserWarning,
stacklevel=2,
)

# Drop rows with fewer than 6 non-NaN values in the oxides columns
df.dropna(subset=oxides, thresh=6, inplace=True)
Expand All @@ -100,13 +103,14 @@ def prep_df_nn(df):
df[oxides] = df[oxides].fillna(0)

# Ensure only oxides, 'Mineral', and 'SampleID' columns are kept
df = df[oxides + ['Mineral', 'SampleID']]
df = df[oxides + ["Mineral", "SampleID"]]

# Ensure SampleID is the index
df.set_index('SampleID', inplace=True)
df.set_index("SampleID", inplace=True)

return df


def norm_data_nn(df):
"""
Expand Down Expand Up @@ -178,7 +182,8 @@ def balance(train_x, train_y, n=1000):
from imblearn.over_sampling import RandomOverSampler
except ImportError:
raise RuntimeError(
"You have not installed imblearn, which is required to balance the datasets used for training the neural networks. If you use conda, run conda install -c conda-forge imbalanced-learn. If you use pip, run pip install -U imbalanced-learn."
"You have not installed imblearn, which is required to balance the datasets used for training the neural networks. "
"If you use conda, run conda install -c conda-forge imbalanced-learn. If you use pip, run pip install -U imbalanced-learn."
)

oversample = RandomOverSampler(sampling_strategy="minority", random_state=42)
Expand Down

0 comments on commit cb65bc5

Please sign in to comment.