In [2]:
"""
01_ingestion_tests.ipynb
Purpose: verify file loading, inspect structure, and test a mock data ingestion pipeline.
"""

# --- Imports ---
import os
import pandas as pd
from pathlib import Path

In [7]:
from pathlib import Path
import yaml, datetime

PROJECT_DIR = Path.home() / "Projects" / "SOMA"

# === 1. Create README.md ===
readme_text = f"""# SOMA  
### System for Observing and Modeling Alignment  

SOMA is a personal data-science project that integrates wearable, physiological, and contextual data ‚Äî from heart rate and glucose to mood, sleep, and daily rhythms ‚Äî to discover patterns of alignment between body, mind, and environment.

---

## üìÅ Project Structure
SOMA/
‚îú‚îÄ‚îÄ data/ # Raw & cleaned input data (Garmin, Strava, CGM, manual logs)
‚îú‚îÄ‚îÄ notebooks/ # Exploratory analysis and pipeline development
‚îú‚îÄ‚îÄ feature_engineering/ # Feature extraction scripts
‚îú‚îÄ‚îÄ modeling/ # ML models, clustering, predictive analysis
‚îú‚îÄ‚îÄ narrative/ # Generative + interpretive summaries (Abacus AI)
‚îú‚îÄ‚îÄ utils/ # Helper functions, constants, config handling
‚îú‚îÄ‚îÄ outputs/ # Results, reports, visualizations
‚îî‚îÄ‚îÄ config.yaml # Project configuration file

---

## üß© Core Objectives
1. **Integrate** data from multiple sources (Garmin/Strava, Dexcom/Stelo, manual logs)  
2. **Engineer** features for physiological state detection (stress, recovery, flow)  
3. **Cluster & Classify** behavioral and physiological patterns  
4. **Generate** natural-language interpretations using Abacus AI  

---

## ü§ñ Abacus AI Integration
SOMA provides structured data, configuration prompts, and contextual metadata to Abacus AI for automated code generation and analysis workflows.

**Primary instruction for Abacus AI:**
> ‚ÄúGiven this README and config.yaml, write modular, well-commented Python scripts for ingestion, feature engineering, clustering, and natural-language generation.‚Äù

---

_Last updated: {datetime.date.today().isoformat()}_
"""

readme_path = PROJECT_DIR / "README.md"
readme_path.write_text(readme_text)
print(f"‚úÖ Created {readme_path}")

# === 2. Create config.yaml ===
config = {
    "project": {
        "name": "SOMA",
        "description": "System for Observing and Modeling Alignment",
        "version": "0.1.0",
        "created": datetime.date.today().isoformat(),
        "author": "TJohn",
    },
    "paths": {
        "project_dir": str(PROJECT_DIR),
        "data_dir": str(PROJECT_DIR / "data"),
        "notebooks_dir": str(PROJECT_DIR / "notebooks"),
        "feature_engineering_dir": str(PROJECT_DIR / "feature_engineering"),
        "modeling_dir": str(PROJECT_DIR / "modeling"),
        "narrative_dir": str(PROJECT_DIR / "narrative"),
        "outputs_dir": str(PROJECT_DIR / "outputs"),
    },
    "data_sources": {
        "garmin_strava": {
            "api": "Strava API",
            "data_type": "activity + biometric",
            "status": "planned",
        },
        "stelo_dexcom": {
            "api": "Dexcom / Stelo",
            "data_type": "glucose",
            "status": "to be explored",
        },
        "manual_log": {
            "file": "SOMA_Manual_Log.xlsx",
            "data_type": "self-reported mood, stress, cycle phase",
            "status": "active",
        },
    },
}

config_path = PROJECT_DIR / "config.yaml"
with open(config_path, "w") as f:
    yaml.dump(config, f, sort_keys=False)
print(f"‚úÖ Created {config_path}")


‚úÖ Created /Users/TJohn/Projects/SOMA/README.md
‚úÖ Created /Users/TJohn/Projects/SOMA/config.yaml


In [8]:


# --- Project paths ---
PROJECT_DIR = Path.home() / "Projects" / "SOMA"
DATA_DIR = PROJECT_DIR / "data"

print("Project directory:", PROJECT_DIR)
print("Data directory:", DATA_DIR)

# --- Create data folder if missing ---
DATA_DIR.mkdir(exist_ok=True)

# --- Example: read a CSV if it exists ---
example_csv = DATA_DIR / "example.csv"

if example_csv.exists():
    df = pd.read_csv(example_csv)
    print(f"\n‚úÖ Loaded {len(df):,} rows from example.csv")
    display(df.head())
else:
    print("\n‚ö†Ô∏è No CSV found yet. Creating a tiny sample file for testing...")
    sample = pd.DataFrame({
        "timestamp": pd.date_range("2025-01-01", periods=5, freq="D"),
        "heart_rate": [72, 80, 75, 78, 70],
        "glucose": [95, 100, 92, 98, 94],
        "notes": ["baseline", "run day", "rest", "stress", "rest"]
    })
    sample.to_csv(example_csv, index=False)
    print(f"‚úÖ Created sample file: {example_csv}")
    display(sample)


Project directory: /Users/TJohn/Projects/SOMA
Data directory: /Users/TJohn/Projects/SOMA/data

‚ö†Ô∏è No CSV found yet. Creating a tiny sample file for testing...
‚úÖ Created sample file: /Users/TJohn/Projects/SOMA/data/example.csv


Unnamed: 0,timestamp,heart_rate,glucose,notes
0,2025-01-01,72,95,baseline
1,2025-01-02,80,100,run day
2,2025-01-03,75,92,rest
3,2025-01-04,78,98,stress
4,2025-01-05,70,94,rest
