# Test Prep Function
This notebook tests `prep_ip()` - the data loading and preparation step.

**Output:** Saves `data_after_prep.pkl` for use in subsequent notebooks.

In [None]:
# Cell 1: Imports
import sys
sys.path.append('..')

from ipms.analysis import prep_ip
import pandas as pd

print("✓ All imports successful!")



# Cell 2: Run prep_ip()
# This will load data, filter, and automatically save to data_after_prep.pkl

data = prep_ip('../config/example_config.yaml')

print("\n✓ prep_ip() complete!")
print("\nData has been saved to: results/data_after_prep.pkl")
print("Next step: Run 02_test_qc.ipynb")



# Cell 3: Quick Inspection
print("="*60)
print("DATA SUMMARY")
print("="*60)

print(f"\nProteins: {data['metadata']['n_proteins']}")
print(f"Samples: {data['metadata']['n_samples']}")
print(f"Conditions: {data['metadata']['conditions']}")

print(f"\nIntensity columns per condition:")
for condition, cols in data['intensity_cols'].items():
    print(f"  {condition}: {len(cols)} replicates")

print(f"\nFirst few proteins:")
df = data['df']
print(df[['Accession', 'Gene_Symbol', '# Peptides']].head())




# Cell 4: Verify Saved File
import os

save_path = '../results/data_after_prep.pkl'

if os.path.exists(save_path):
    size_mb = os.path.getsize(save_path) / (1024 * 1024)
    print(f"✓ Data saved successfully!")
    print(f"  Location: {save_path}")
    print(f"  Size: {size_mb:.1f} MB")
    print(f"\n✓ Ready for next step: 02_test_qc.ipynb")
else:
    print(f"✗ Save file not found: {save_path}")

## Next Steps

Data is now saved and ready for:
- **02_test_qc.ipynb** - Quality control plots
- **03_test_norm.ipynb** - Normalization

You can close this notebook - the data is saved!