Skip to content
Merged
4 changes: 3 additions & 1 deletion examples/pzmm_generate_complete_model_card.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1716,12 +1716,14 @@
],
"source": [
"# Step 13: Generate requirements files\n",
"requirements_json = pzmm.JSONFiles.create_requirements_json(output_path)\n",
"requirements_json = pzmm.JSONFiles.create_requirements_json(output_path, create_requirements_txt=False)\n",
"\n",
"import json\n",
"print(json.dumps(requirements_json, sort_keys=True, indent=4))\n",
"\n",
"for requirement in requirements_json:\n",
" # Example: Replace sklearn with scikit-learn in requirements\n",
" # (This is redundant in newer versions but shows how to modify package names)\n",
" if 'sklearn' in requirement['step']:\n",
" requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n",
" requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n",
Expand Down
10 changes: 7 additions & 3 deletions examples/pzmm_generate_requirements_json.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@
"id": "e9b8cb7c-1974-4af5-8992-d51f90fcfe5b",
"metadata": {},
"source": [
"# Automatic Generation of the requirements.json File\n",
"# Automatic Generation of the requirements.json or requirements.txt File\n",
"In order to validate Python models within a container publishing destination, the Python packages which contain the modules that are used in the Python score code file and its score resource files must be installed in the run-time container. You can install the packages when you publish a Python model or decision that contains a Python model to a container publishing destination by adding a `requirements.json` file that includes the package install statements to your model.\n",
"\n",
"This notebook provides an example execution and assessment of the create_requirements_json() function added in python-sasctl v1.8.0. The aim of this function is help to create the instructions (aka the `requirements.json` file) for a lightweight Python container in SAS Model Manager. Lightweight here meaning that the container will only install the packages found in the model's pickle files and python scripts.\n",
"\n",
"Additionally, the create_requirements_json() function provides an optional parameter `create_requirements_txt` which when set to `True` will generate a requirements.txt file alongside the requirements.json file. By default this option is set to `False`. The requirements.txt file is needed when deploying Python models to SAS Event Stream Processing, which requires this format for package installation in their environment. While SAS Model Manager continues to use the requirements.json format, adding the requirements.txt file ensures compatibility across both platforms. \n",
"\n",
"### **User Warnings**\n",
"The methods utilized in this function can determine package dependencies and versions from provided scripts and pickle files, but there are some stipulations that need to be considered:\n",
"\n",
"1. If run outside of the development environment that the model was created in, the create_requirements_json() function **CANNOT** determine the required package _versions_ accurately. \n",
"2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`).\n",
"2. Not all Python packages have matching import and install names and as such some of the packages added to the requirements.json file may be incorrectly named (i.e. `import sklearn` vs `pip install scikit-learn`). Some of the major packages with differing import and install names are automatically converted. \n",
"\n",
"As such, it is recommended that the user check over the requirements.json file for package name and version accuracy before deploying to a run-time container in SAS Model Manager."
]
Expand Down Expand Up @@ -63,7 +65,7 @@
"outputs": [],
"source": [
"model_dir = Path.cwd() / \"data/hmeqModels/DecisionTreeClassifier\"\n",
"requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir)"
"requirements_json = pzmm.JSONFiles.create_requirements_json(model_dir, create_requirements_txt=False)"
]
},
{
Expand Down Expand Up @@ -145,6 +147,8 @@
],
"source": [
"for requirement in requirements_json:\n",
" # Example: Replace sklearn with scikit-learn in requirements\n",
" # (This is redundant in newer versions but shows how to modify package names)\n",
" if 'sklearn' in requirement['step']:\n",
" requirement['command'] = requirement[\"command\"].replace('sklearn', 'scikit-learn')\n",
" requirement['step'] = requirement['step'].replace('sklearn', 'scikit-learn')\n",
Expand Down
63 changes: 59 additions & 4 deletions src/sasctl/pzmm/write_json_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,7 @@ def create_requirements_json(
cls,
model_path: Union[str, Path, None] = Path.cwd(),
output_path: Union[str, Path, None] = None,
create_requirements_txt: bool = False,
) -> Union[dict, None]:
"""
Searches the model directory for Python scripts and pickle files and
Expand All @@ -1636,14 +1637,22 @@ def create_requirements_json(
environment.

When provided with an output_path argument, this function outputs a JSON file
named "requirements.json". Otherwise, a list of dicts is returned.
named "requirements.json". If create_requirements_txt is True, it will also
create a requirements.txt file. Otherwise, a list of dicts is returned.

Note: requirements.txt file is only created when both output_path and
create_requirements_txt are specified.

Parameters
----------
model_path : str or pathlib.Path, optional
The path to a Python project, by default the current working directory.
output_path : str or pathlib.Path, optional
The path for the output requirements.json file. The default value is None.
create_requirements_txt : bool, optional
Whether to also create a requirements.txt file in addition to the
requirements.json file. This is useful for SAS Event Stream Processing
environments. The default value is False.

Returns
-------
Expand All @@ -1662,11 +1671,57 @@ def create_requirements_json(
package_list = list(set(list(_flatten(package_list))))
package_list = cls.remove_standard_library_packages(package_list)
package_and_version = cls.get_local_package_version(package_list)

# Identify packages with missing versions
missing_package_versions = [
item[0] for item in package_and_version if not item[1]
]

IMPORT_TO_INSTALL_MAPPING = {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the addition of this import to install mapping create some issues with backwards compatibility? I noticed that in the example files it directs users to manually change the import names for packages like sklearn.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I think we're good here, since it was a non-programmatic ask. If they go hunting to change something and find it's fixed, I doubt we'll get complaints.

# Data Science & ML Core
"sklearn": "scikit-learn",
"skimage": "scikit-image",
"cv2": "opencv-python",
"PIL": "Pillow",
# Data Formats & Parsing
"yaml": "PyYAML",
"bs4": "beautifulsoup4",
"docx": "python-docx",
"pptx": "python-pptx",
# Date & Time Utilities
"dateutil": "python-dateutil",
# Database Connectors
"MySQLdb": "MySQL-python",
"psycopg2": "psycopg2-binary",
# System & Platform
"win32api": "pywin32",
"win32com": "pywin32",
# Scientific Libraries
"Bio": "biopython",
}

# Map import names to their corresponding package installation names
package_and_version = [
(IMPORT_TO_INSTALL_MAPPING.get(name, name), version)
for name, version in package_and_version
]

if create_requirements_txt:
requirements_txt = ""
if missing_package_versions:
requirements_txt += "# Warning- The existence and/or versions for the following packages could not be determined:\n"
requirements_txt += "# " + ", ".join(missing_package_versions) + "\n"

for package, version in package_and_version:
if version:
requirements_txt += f"{package}=={version}\n"

if output_path:
with open( # skipcq: PTC-W6004
Path(output_path) / "requirements.txt", "w"
) as file:
file.write(requirements_txt)

# Create a list of dicts related to each package or warning
json_dicts = []
if missing_package_versions:
Expand Down Expand Up @@ -1800,16 +1855,16 @@ def find_imports(file_path: Union[str, Path]) -> List[str]:
file_text = file.read()
# Parse the file to get the abstract syntax tree representation
tree = ast.parse(file_text)
modules = []
modules = set()

# Walk through each node in the ast to find import calls
for node in ast.walk(tree):
# Determine parent module for `from * import *` calls
if isinstance(node, ast.ImportFrom):
modules.append(node.module)
modules.add(node.module.split(".")[0])
elif isinstance(node, ast.Import):
for name in node.names:
modules.append(name.name)
modules.add(name.name.split(".")[0])

modules = list(set(modules))
try:
Expand Down
14 changes: 11 additions & 3 deletions tests/unit/test_write_json_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,8 +699,9 @@ def test_create_requirements_json(change_dir):
dtc = dtc.fit(x_train, y_train)
with open(tmp_dir / "DecisionTreeClassifier.pickle", "wb") as pkl_file:
pickle.dump(dtc, pkl_file)
jf.create_requirements_json(tmp_dir, Path(tmp_dir))
jf.create_requirements_json(tmp_dir, Path(tmp_dir), True)
assert (Path(tmp_dir) / "requirements.json").exists()
assert (Path(tmp_dir) / "requirements.txt").exists()

json_dict = jf.create_requirements_json(tmp_dir)
expected = [
Expand All @@ -709,13 +710,20 @@ def test_create_requirements_json(change_dir):
"command": f"pip install numpy=={np.__version__}",
},
{
"step": "install sklearn",
"command": f"pip install sklearn=={sk.__version__}",
"step": "install scikit-learn",
"command": f"pip install scikit-learn=={sk.__version__}",
},
]
unittest.TestCase.maxDiff = None
unittest.TestCase().assertCountEqual(json_dict, expected)

# Verify requirements.txt content
with open(Path(tmp_dir) / "requirements.txt", "r") as file:
requirements_content = [line.strip() for line in file.readlines()]

assert f"numpy=={np.__version__}" in requirements_content
assert f"scikit-learn=={sk.__version__}" in requirements_content


class TestAssessBiasHelpers(unittest.TestCase):
md_1 = pd.DataFrame({"Value": [0], "Base": ["A"], "Compare": ["C"]})
Expand Down
Loading