# 1. Import Essential Libraries
This cell imports core Python libraries used for environment inspection and quick validation.

In [6]:
# Import core libraries
import os, sys, platform, json, pathlib, math

print('Imported: os, sys, platform, json, pathlib, math')

Imported: os, sys, platform, json, pathlib, math


# 2. Display a Welcome Message
This confirms the notebook environment is active.

In [7]:
# Simple welcome / sanity output
print('Welcome to the AI Medical Diagnosis first-run bootstrap notebook!')
print('Notebook is running in:', os.getcwd())

Welcome to the AI Medical Diagnosis first-run bootstrap notebook!
Notebook is running in: /Users/niravpolara/PycharmProjects/ai-medical-diagnosis/notebooks


# 3. Check Python Version
Display interpreter and version details.

In [8]:
import sys, platform
print('Python executable:', sys.executable)
print('Version:', platform.python_version())
print('Platform:', platform.platform())

Python executable: /Users/niravpolara/PycharmProjects/ai-medical-diagnosis/.venv/bin/python
Version: 3.13.7
Platform: macOS-26.0.1-arm64-arm-64bit-Mach-O


# 4. List Current Working Directory Contents
List files and folders to confirm repository structure is accessible.

In [9]:
from pathlib import Path
root = Path.cwd()
print('Current working dir:', root)
for p in list(root.iterdir())[:50]:
    print('-', p.name, ('<DIR>' if p.is_dir() else ''))

Current working dir: /Users/niravpolara/PycharmProjects/ai-medical-diagnosis/notebooks
- transform_visualization.ipynb 
- first_run_bootstrap.ipynb 
- data_preprocessing.ipynb 
- exploratory_data_analysis.ipynb 
- data <DIR>


# 5. Run a Simple Calculation
Quick arithmetic to verify execution works.

In [10]:
result = sum(i*i for i in range(1, 51))
print('Sum of squares 1..50 =', result)
assert result == 42925, 'Unexpected calculation result'
print('Calculation OK.')

Sum of squares 1..50 = 42925
Calculation OK.


# 6. GPU & Environment Check
Determine if CUDA / MPS (Apple Silicon) is available and report basic device info.

In [11]:
# GPU / Device diagnostics
try:
    import torch
    cuda = torch.cuda.is_available()
    mps = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
    device = (
        'cuda' if cuda else ('mps' if mps else 'cpu')
    )
    print(f"Torch version: {torch.__version__}")
    print(f"CUDA available: {cuda}")
    if cuda:
        print(f"CUDA device count: {torch.cuda.device_count()}")
        print(f"Current device: {torch.cuda.current_device()} -> {torch.cuda.get_device_name(torch.cuda.current_device())}")
    print(f"MPS (Apple Silicon) available: {mps}")
    print('Selected default device:', device)
except Exception as e:
    print('Torch not available or failed to query:', e)

Torch version: 2.8.0
CUDA available: False
MPS (Apple Silicon) available: True
Selected default device: mps


# 7. Dependency Version Snapshot
Capture key library versions for reproducibility.

In [12]:
import importlib
packages = [
    'numpy','pandas','torch','torchvision','scikit_learn','matplotlib','seaborn','albumentations','wandb','hydra','omegaconf'
]
report = {}
for pkg in packages:
    try:
        # map scikit_learn to sklearn
        mod_name = 'sklearn' if pkg=='scikit_learn' else pkg
        mod = importlib.import_module(mod_name)
        ver = getattr(mod, '__version__', 'unknown')
        report[pkg] = ver
    except Exception as e:
        report[pkg] = f'NOT INSTALLED ({e.__class__.__name__})'

max_len = max(len(k) for k in report)
for k,v in report.items():
    print(f"{k.ljust(max_len)} : {v}")

numpy          : 2.2.6
pandas         : 2.3.2
torch          : 2.8.0
torchvision    : 0.23.0
scikit_learn   : 1.7.2
matplotlib     : 3.10.6
seaborn        : 0.13.2
albumentations : 2.0.8
wandb          : 0.22.1
hydra          : 1.3.2
omegaconf      : 2.3.0


# 8. Quick Data Sanity Preview
Load a few metadata rows & class distribution for confirmation.

In [13]:
import pandas as pd
from pathlib import Path
meta_path = Path('notebooks/data/metadata/kaggle_pneumonia_metadata_clean.csv')
if meta_path.exists():
    df = pd.read_csv(meta_path)
    print('Rows:', len(df), '| Columns:', list(df.columns))
    if 'split' in df.columns and 'class' in df.columns:
        print('\nClass distribution by split:')
        print(df.groupby('split')['class'].value_counts().unstack(fill_value=0))
    print('\nSample rows:')
    display(df.head())
else:
    print('Metadata CSV not found at', meta_path)


Metadata CSV not found at notebooks/data/metadata/kaggle_pneumonia_metadata_clean.csv
