Run on laptop: to validate your local Python + venv setup.

Run on OpenShift AI: to confirm the cloned repo and Python environment are ready inside the workbench.

In [1]:
# Basic environment summary
import sys
import platform
from pathlib import Path

print("=== Python environment ===")
print("Python executable:", sys.executable)
print("Python version   :", sys.version)
print("Platform         :", platform.platform())

project_root = Path.cwd()
print("\nAssumed project root:", project_root)
print("Contents:", [p.name for p in project_root.iterdir()][:20])


=== Python environment ===
Python executable: /Users/vgrover/Downloads/software/AIWorkshops/MLforEng/.venv/bin/python3.13
Python version   : 3.13.5 (main, Jun 11 2025, 15:36:57) [Clang 17.0.0 (clang-1700.0.13.3)]
Platform         : macOS-26.1-arm64-arm-64bit-Mach-O

Assumed project root: /Users/vgrover/Downloads/software/AIWorkshops/MLforEng/workshops/00_overview
Contents: ['00_overview.ipynb', '.ipynb_checkpoints']


In [2]:
# Check that we’re inside the .venv & on Python 3.11
import sys
from pathlib import Path

print("=== venv & Python version check ===")

venv_dir = Path(".venv")
print("`.venv` directory exists:", venv_dir.exists())

print("Current Python executable:", sys.executable)
print("Current Python version   :", sys.version.split()[0])

major, minor = sys.version_info[:2]
if major == 3 and minor == 11:
    print("\n✅ OK: Python 3.11 is active.")
else:
    print("\n❌ WARNING: Expected Python 3.11.")
    print("   Please recreate the venv with:")
    print("   python3.11 -m venv .venv && source .venv/bin/activate")


=== venv & Python version check ===
`.venv` directory exists: False
Current Python executable: /Users/vgrover/Downloads/software/AIWorkshops/MLforEng/.venv/bin/python3.13
Current Python version   : 3.13.5

   Please recreate the venv with:
   python3.11 -m venv .venv && source .venv/bin/activate


In [3]:
# Check core workshop package (mlforeng)
print("=== Import mlforeng ===")

try:
    import mlforeng
    print("✅ OK: mlforeng imported successfully.")
    print("mlforeng module path:", mlforeng.__file__)
except Exception as e:
    print("❌ ERROR: Could not import mlforeng")
    print("Exception:", repr(e))
    print("\nHints:")
    print("- Make sure you're running this notebook from the MLforEng repo root.")
    print("- Make sure `.venv` is activated.")


=== Import mlforeng ===
❌ ERROR: Could not import mlforeng
Exception: ModuleNotFoundError("No module named 'mlforeng'")

Hints:
- Make sure you're running this notebook from the MLforEng repo root.
- Make sure `.venv` is activated.


In [4]:
# Check LLM deps: datasets, pyarrow, dm-tree


print("=== LLM dependency check (datasets, pyarrow, dm-tree) ===")

problems = False

try:
    import datasets
    print(f"✅ datasets imported (version {datasets.__version__})")
except Exception as e:
    problems = True
    print("❌ datasets not available:", repr(e))
    print("   Try: pip install 'datasets>=2.18.0'")

try:
    import pyarrow
    print(f"✅ pyarrow imported (version {pyarrow.__version__})")
except Exception as e:
    problems = True
    print("❌ pyarrow not available:", repr(e))
    print("   Try: pip install 'pyarrow>=12.0.0'")

try:
    import tree
    print("✅ dm-tree (module `tree`) imported")
except Exception as e:
    problems = True
    print("❌ dm-tree (module `tree`) not available:", repr(e))
    print("   Try: pip install dm-tree")

if not problems:
    print("\n✅ LLM base dependencies look good.")
else:
    print("\n⚠️ Fix the errors above, then re-run this cell.")


=== LLM dependency check (datasets, pyarrow, dm-tree) ===
❌ datasets not available: ModuleNotFoundError("No module named 'datasets'")
   Try: pip install 'datasets>=2.18.0'
❌ pyarrow not available: ModuleNotFoundError("No module named 'pyarrow'")
   Try: pip install 'pyarrow>=12.0.0'
❌ dm-tree (module `tree`) not available: ModuleNotFoundError("No module named 'tree'")
   Try: pip install dm-tree

⚠️ Fix the errors above, then re-run this cell.


In [5]:
# Import LLM helper module (mlforeng.llm_finetune.create_dataset)

print("=== LLM helper import test ===")

try:
    from mlforeng.llm_finetune import create_dataset
    print("✅ Imported mlforeng.llm_finetune.create_dataset OK")
except Exception as e:
    print("❌ ERROR importing mlforeng.llm_finetune.create_dataset")
    print("Exception:", repr(e))
    print("\nCheck that:")
    print("- mlforeng/llm_finetune/__init__.py only imports `create_dataset`")
    print("- LLM dependencies were installed in this venv.")


=== LLM helper import test ===
❌ ERROR importing mlforeng.llm_finetune.create_dataset
Exception: ModuleNotFoundError("No module named 'mlforeng'")

Check that:
- mlforeng/llm_finetune/__init__.py only imports `create_dataset`
- LLM dependencies were installed in this venv.


In [6]:
# Optional: quick dataset creation smoke test

from pathlib import Path

print("=== OPTIONAL: GSM8K dataset JSONL creation test ===")
print("This may download ~a few MB from Hugging Face and write JSONL under artifacts/llm_datasets")

try:
    from mlforeng.llm_finetune import create_dataset

    out_dir = create_dataset.gsm8k_qa_no_tokens_template()
    out_dir = Path(out_dir)

    print("✅ Dataset template function ran.")
    print("Output directory:", out_dir)
    print("Files:", [p.name for p in out_dir.glob("*.jsonl")])

except KeyboardInterrupt:
    print("⏹️  Aborted by user.")
except Exception as e:
    print("❌ ERROR while creating GSM8K JSONL")
    print("Exception:", repr(e))
    print("\nCheck network access and your Hugging Face rate limits / HF_TOKEN if needed.")


=== OPTIONAL: GSM8K dataset JSONL creation test ===
This may download ~a few MB from Hugging Face and write JSONL under artifacts/llm_datasets
❌ ERROR while creating GSM8K JSONL
Exception: ModuleNotFoundError("No module named 'mlforeng'")

Check network access and your Hugging Face rate limits / HF_TOKEN if needed.
