# PAD Analytics Functions Demonstration

This notebook demonstrates all available functions in PAD Analytics v0.2.1, organized by category.

**Author**: PAD Analytics Team  
**Version**: 0.2.1  
**Date**: July 2025

In [None]:
# Setup and imports
import pad_analytics as pad
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

print(f"PAD Analytics version: {pad.__version__}")

## 1. Dataset Management Functions

These functions help you discover, load, and manage PAD datasets.

In [None]:
# List all available datasets
datasets = pad.get_datasets()
print(f"Available datasets: {len(datasets)}")
datasets.head()

In [None]:
# Get detailed dataset information with model mappings
dataset_list = pad.get_dataset_list()
print("Dataset information with model associations:")
dataset_list

In [None]:
# Load a specific dataset
dataset_name = "FHI2020_Stratified_Sampling"
fhi_data = pad.get_dataset(dataset_name)
print(f"Loaded '{dataset_name}' with {len(fhi_data)} samples")
print(f"Columns: {list(fhi_data.columns)[:5]}...")

In [None]:
# Get clean dataset cards (without internal columns)
cards = pad.get_dataset_cards("FHI2020_Stratified_Sampling")
print(f"Dataset cards: {len(cards)} samples")
cards[['card_id', 'sample_id', 'sample_name', 'quantity']].head()

In [None]:
# Get dataset associated with a model
model_dataset = pad.get_dataset_from_model_id(16)
dataset_name = pad.get_dataset_name_from_model_id(16)
print(f"Model 16 uses dataset: '{dataset_name}'")
print(f"Dataset size: {len(model_dataset)} samples")

## 2. Card/Sample Management Functions

Functions for retrieving and managing individual PAD test cards.

In [None]:
# Get card by ID
card = pad.get_card_by_id(47918)
print(f"Card {card['card_id'].iloc[0]}:")
print(f"  Sample: {card['sample_name'].iloc[0]}")
print(f"  Quantity: {card['quantity'].iloc[0]}%")
print(f"  Project: {card['project.project_name'].iloc[0]}")

In [None]:
# Get cards by sample ID (can return multiple cards)
sample_cards = pad.get_card_by_sample_id(52677)
print(f"Found {len(sample_cards)} cards for sample_id 52677")
if len(sample_cards) > 0:
    sample_cards[['card_id', 'sample_id', 'sample_name', 'quantity']].head()

In [None]:
# Get cards with known quality issues
issues = pad.get_card_issues()
print(f"Cards with known issues: {len(issues)}")
print("These should typically be excluded from analysis")

## 3. Project Management Functions

Functions for exploring data organized by research projects.

In [None]:
# List all projects
projects = pad.get_projects()
print(f"Total projects: {len(projects)}")
projects[['project_id', 'project_name']].head()

In [None]:
# Get specific project
project = pad.get_project(id=1)
if project:
    print(f"Project 1: {project.get('project_name', 'Unknown')}")

# Get project cards
project_cards = pad.get_project_cards(project_name="FHI Study 2020")
if project_cards is not None:
    print(f"\nCards in 'FHI Study 2020': {len(project_cards)}")

## 4. Model Management Functions

Functions for discovering and retrieving ML models.

In [None]:
# List all models
models = pad.get_models()
print(f"Available models: {len(models)}")
print("\nKey models:")
print("- Model 16: Neural Network for drug classification")
print("- Model 17: Neural Network for concentration")
print("- Model 18: PLS for concentration")
print("- Model 19: Neural Network for concentration v2")
models[['model_id', 'model_name', 'model_type']].head()

In [None]:
# Get model data
model_train = pad.get_model_data(16, type='train')
model_test = pad.get_model_data(16, type='test')
print(f"Model 16 training data: {len(model_train) if isinstance(model_train, pd.DataFrame) else 'N/A'} samples")
print(f"Model 16 test data: {len(model_test) if isinstance(model_test, pd.DataFrame) else 'N/A'} samples")

## 5. Visualization Functions

Interactive displays of PAD cards and predictions.

In [None]:
# Display single card
print("Single card display:")
pad.show_card(card_id=47918)

In [None]:
# Display multiple cards
print("Multiple cards display:")
pad.show_cards(card_ids=[47918, 47919, 47920])

In [None]:
# Display cards from DataFrame
sample_df = cards[cards['sample_name'].str.contains('ceftriaxone', case=False, na=False)].head(3)
if len(sample_df) > 0:
    print("Cards from DataFrame:")
    pad.show_cards_from_df(sample_df)

In [None]:
# Display grouped cards
grouped_df = cards[cards['sample_name'].str.contains('rifampicin', case=False, na=False)].head(12)
if len(grouped_df) > 5:
    print("Cards grouped by concentration:")
    pad.show_grouped_cards(grouped_df, group_column='quantity', images_per_row=3)

## 6. Prediction & Analysis Functions

Apply ML models for drug identification and quantification.

In [None]:
# Single prediction - Drug classification
actual, prediction = pad.predict(card_id=47918, model_id=16)
drug_name, confidence, energy = prediction
print("Drug Classification (Model 16):")
print(f"  Actual: {actual}")
print(f"  Predicted: {drug_name} (confidence: {confidence:.2%})")
print(f"  Energy: {energy:.2f}")

In [None]:
# Single prediction - Concentration
actual, predicted = pad.predict(card_id=47918, model_id=18)
print("\nConcentration Prediction (Model 18 - PLS):")
print(f"  Actual: {actual:.2f}%")
print(f"  Predicted: {predicted:.2f}%")
print(f"  Error: {abs(actual - predicted):.2f}%")

In [None]:
# Show prediction with visualization
print("Card with prediction:")
pad.show_prediction(card_id=47918, model_id=16)

In [None]:
# Batch predictions (optimized in v0.2.1!)
small_batch = cards.head(5)
print(f"Processing batch of {len(small_batch)} cards...")

results = pad.apply_predictions_to_dataframe(small_batch, model_id=16)
print(f"\nCompleted {len(results)} predictions")
results[['card_id', 'sample_name', 'actual', 'prediction']].head()

## 7. Advanced Features

### Dataset Manager Access

In [None]:
# Access the dataset manager for advanced operations
dm = pad.get_dataset_manager()
print(f"DatasetManager instance: {type(dm)}")
print("Provides access to advanced dataset operations")

### Performance Tips

1. **Batch Processing**: Use `apply_predictions_to_dataframe()` for multiple predictions
   - v0.2.1 optimizes this with single model loading
   - 50-80% faster than individual predictions

2. **Data Filtering**: Exclude problematic cards using `get_card_issues()`

3. **Caching** (Coming Soon): Phase 1 caching implementation in PR #13
   - Will enable offline analysis
   - Reduce redundant downloads

## Common Workflows

### Workflow 1: Analyze a specific drug across concentrations

In [None]:
# Get all rifampicin samples
rifampicin = cards[cards['sample_name'].str.contains('rifampicin', case=False, na=False)]
print(f"Found {len(rifampicin)} rifampicin samples")

# Group by concentration
conc_groups = rifampicin.groupby('quantity').size()
print("\nSamples by concentration:")
print(conc_groups)

### Workflow 2: Quality control - exclude problematic cards

In [None]:
# Get problematic cards
issues = pad.get_card_issues()
issue_ids = set(issues['card_id'])

# Filter dataset
clean_cards = cards[~cards['card_id'].isin(issue_ids)]
print(f"Original dataset: {len(cards)} cards")
print(f"After filtering: {len(clean_cards)} cards")
print(f"Removed: {len(cards) - len(clean_cards)} problematic cards")

### Workflow 3: Model comparison

In [None]:
# Compare concentration predictions from different models
test_card = 47918

# Model 18 (PLS)
actual, pred_18 = pad.predict(card_id=test_card, model_id=18)

# Model 19 (NN v2) - if available
try:
    _, pred_19 = pad.predict(card_id=test_card, model_id=19)
    print(f"Card {test_card} concentration predictions:")
    print(f"  Actual: {actual:.1f}%")
    print(f"  Model 18 (PLS): {pred_18:.1f}%")
    print(f"  Model 19 (NN): {pred_19:.1f}%")
except:
    print(f"Card {test_card} concentration:")
    print(f"  Actual: {actual:.1f}%")
    print(f"  Model 18 (PLS): {pred_18:.1f}%")

## Summary

PAD Analytics v0.2.1 provides comprehensive functions for:

1. **Dataset Management**: Load and explore ML datasets
2. **Card Management**: Retrieve PAD test results
3. **Project Organization**: Access data by research project
4. **Model Access**: Use trained ML models
5. **Visualization**: Interactive card displays
6. **Prediction**: Drug identification and quantification
7. **Batch Processing**: Optimized parallel processing

For more information:
- GitHub: https://github.com/PaperAnalyticalDeviceND/pad-analytics
- PyPI: https://pypi.org/project/pad-analytics/