diff --git a/docs/api/index.rst b/docs/api/index.rst index 7faa905c..936350d6 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -24,4 +24,11 @@ This page provides a high-level overview of all public mesa-frames objects, func .. toctree:: :maxdepth: 2 - reference/space/index \ No newline at end of file + reference/space/index + + .. grid-item-card:: + + .. toctree:: + :maxdepth: 2 + + reference/datacollector \ No newline at end of file diff --git a/docs/api/reference/datacollector.rst b/docs/api/reference/datacollector.rst new file mode 100644 index 00000000..bdf38cfd --- /dev/null +++ b/docs/api/reference/datacollector.rst @@ -0,0 +1,10 @@ +Data Collection +===== + +.. currentmodule:: mesa_frames + +.. autoclass:: DataCollector + :members: + :inherited-members: + :autosummary: + :autosummary-nosignatures: \ No newline at end of file diff --git a/docs/general/user-guide/1_classes.md b/docs/general/user-guide/1_classes.md index 7c65c941..ac696731 100644 --- a/docs/general/user-guide/1_classes.md +++ b/docs/general/user-guide/1_classes.md @@ -64,3 +64,30 @@ class GridWorld(ModelDF): ``` A continuous GeoSpace, NetworkSpace, and a collection to have multiple spaces in the models are in the works! 🚧 + +## DataCollector πŸ—‚οΈ + +`DataCollector` records model- and agent-level data during simulation. +You configure what to collect, how to store it, and when to trigger collection. + +Example: + +```python +class ExampleModel(ModelDF): + def __init__(self): + super().__init__() + self.agents = MoneyAgent(self) + self.datacollector = DataCollector( + model=self, + model_reporters={"total_wealth": lambda m: m.agents["wealth"].sum()}, + agent_reporters={"wealth": "wealth"}, + storage="csv", + storage_uri="./data", + trigger=lambda m: m.schedule.steps % 2 == 0 + ) + + def step(self): + self.agents.step() + self.datacollector.conditional_collect() + self.datacollector.flush() +``` diff --git a/docs/general/user-guide/2_introductory-tutorial.ipynb b/docs/general/user-guide/2_introductory-tutorial.ipynb index 0c3d795e..24742f80 100644 --- a/docs/general/user-guide/2_introductory-tutorial.ipynb +++ b/docs/general/user-guide/2_introductory-tutorial.ipynb @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "from mesa_frames import ModelDF, AgentSetPolars\n", + "from mesa_frames import ModelDF, AgentSetPolars, DataCollector\n", "\n", "\n", "class MoneyModelDF(ModelDF):\n", @@ -57,6 +57,14 @@ " super().__init__()\n", " self.n_agents = N\n", " self.agents += agents_cls(N, self)\n", + " self.datacollector = DataCollector(\n", + " model=self,\n", + " model_reporters={\"total_wealth\": lambda m: m.agents[\"wealth\"].sum()},\n", + " agent_reporters={\"wealth\": \"wealth\"},\n", + " storage=\"csv\",\n", + " storage_uri=\"./data\",\n", + " trigger=lambda m: m.schedule.steps % 2 == 0,\n", + " )\n", "\n", " def step(self):\n", " # Executes the step method for every agentset in self.agents\n", @@ -64,7 +72,9 @@ "\n", " def run_model(self, n):\n", " for _ in range(n):\n", - " self.step()" + " self.step()\n", + " self.datacollector.conditional_collect\n", + " self.datacollector.flush()" ] }, { diff --git a/docs/general/user-guide/4_datacollector.ipynb b/docs/general/user-guide/4_datacollector.ipynb new file mode 100644 index 00000000..247dbf70 --- /dev/null +++ b/docs/general/user-guide/4_datacollector.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7fb27b941602401d91542211134fc71a", + "metadata": {}, + "source": [ + "# Data Collector Tutorial\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/projectmesa/mesa-frames/blob/main/docs/general/user-guide/4_datacollector.ipynb)\n", + "\n", + "This notebook walks you through using the concrete `DataCollector` in `mesa-frames` to collect model- and agent-level data and write it to different storage backends: **memory, CSV, Parquet, S3, and PostgreSQL**.\n", + "\n", + "It also shows how to use **conditional triggers** and how the **schema validation** behaves for PostgreSQL.\n" + ] + }, + { + "cell_type": "markdown", + "id": "acae54e37e7d407bbb7b55eff062a284", + "metadata": {}, + "source": [ + "## Installation (Colab or fresh env)\n", + "\n", + "Uncomment and run the next cell if you're in Colab or a clean environment.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a63283cbaf04dbcab1f6479b197f3a8", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "# !pip install git+https://github.com/projectmesa/mesa-frames mesa" + ] + }, + { + "cell_type": "markdown", + "id": "8dd0d8092fe74a7c96281538738b07e2", + "metadata": {}, + "source": [ + "## Minimal Example Model\n", + "\n", + "We create a tiny model using the `ModelDF` and an `AgentSetPolars`-style agent container. This is just to demonstrate collection APIs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72eea5119410473aa328ad9291626812", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "from mesa_frames import ModelDF, AgentSetPolars, DataCollector\n", + "import polars as pl\n", + "\n", + "\n", + "class MoneyAgents(AgentSetPolars):\n", + " def __init__(self, n: int, model: ModelDF):\n", + " super().__init__(model)\n", + " # one column, one unit of wealth each\n", + " self += pl.DataFrame({\"wealth\": pl.ones(n, eager=True)})\n", + "\n", + " def step(self) -> None:\n", + " self.select(self.wealth > 0)\n", + " receivers = self.df.sample(n=len(self.active_agents), with_replacement=True)\n", + " self[\"active\", \"wealth\"] -= 1\n", + " income = receivers.group_by(\"unique_id\").len()\n", + " self[income[\"unique_id\"], \"wealth\"] += income[\"len\"]\n", + "\n", + "\n", + "class MoneyModel(ModelDF):\n", + " def __init__(self, n: int):\n", + " super().__init__()\n", + " self.agents = MoneyAgents(n, self)\n", + " self.dc = DataCollector(\n", + " model=self,\n", + " model_reporters={\n", + " \"total_wealth\": lambda m: m.agents[\"wealth\"].sum(),\n", + " \"n_agents\": lambda m: len(m.agents),\n", + " },\n", + " agent_reporters={\n", + " \"wealth\": \"wealth\", # pull existing column\n", + " },\n", + " storage=\"memory\", # we'll switch this per example\n", + " storage_uri=None,\n", + " trigger=lambda m: m._steps % 2\n", + " == 0, # collect every 2 steps via conditional_collect\n", + " reset_memory=True,\n", + " )\n", + "\n", + " def step(self):\n", + " self.agents.do(\"step\")\n", + "\n", + " def run(self, steps: int, conditional: bool = True):\n", + " for _ in range(steps):\n", + " self.step()\n", + " self.dc.conditional_collect() # or .collect if you want to collect every step regardless of trigger\n", + "\n", + "\n", + "model = MoneyModel(1000)\n", + "model.run(10)\n", + "model.dc.data # peek in-memory dataframes" + ] + }, + { + "cell_type": "markdown", + "id": "3d3ca41d", + "metadata": {}, + "source": [ + "## Saving the data for later use \n", + "\n", + "`DataCollector` supports multiple storage backends. \n", + "Files are saved with **step number** and **batch number** (e.g., `model_step10_batch2.csv`) so multiple collects at the same step don’t overwrite. \n", + " \n", + "- **CSV:** `storage=\"csv\"` β†’ writes `model_step{n}_batch{k}.csv`, easy to open anywhere. \n", + "- **Parquet:** `storage=\"parquet\"` β†’ compressed, efficient for large datasets. \n", + "- **S3:** `storage=\"S3-csv\"`/`storage=\"S3-parquet\"` β†’ saves CSV/Parquet directly to Amazon S3. \n", + "- **PostgreSQL:** `storage=\"postgresql\"` β†’ inserts results into `model_data` and `agent_data` tables for querying. \n" + ] + }, + { + "cell_type": "markdown", + "id": "8edb47106e1a46a883d545849b8ab81b", + "metadata": {}, + "source": [ + "## Writing to Local CSV\n", + "\n", + "Switch the storage to `csv` and provide a folder path. Files are written as `model_step{n}.csv` and `agent_step{n}.csv`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f14f38c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.makedirs(\"./data_csv\", exist_ok=True)\n", + "model_csv = MoneyModel(1000)\n", + "model_csv.dc = DataCollector(\n", + " model=model_csv,\n", + " model_reporters={\n", + " \"total_wealth\": lambda m: m.agents[\"wealth\"].sum(),\n", + " \"n_agents\": lambda m: len(m.agents),\n", + " },\n", + " agent_reporters={\n", + " \"wealth\": \"wealth\",\n", + " },\n", + " storage=\"csv\", # saving as csv\n", + " storage_uri=\"./data_csv\",\n", + " trigger=lambda m: m._steps % 2 == 0,\n", + " reset_memory=True,\n", + ")\n", + "model_csv.run(10)\n", + "model_csv.dc.flush()\n", + "os.listdir(\"./data_csv\")" + ] + }, + { + "cell_type": "markdown", + "id": "10185d26023b46108eb7d9f57d49d2b3", + "metadata": {}, + "source": [ + "## Writing to Local Parquet\n", + "\n", + "Use `parquet` for columnar output.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8763a12b2bbd4a93a75aff182afb95dc", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "os.makedirs(\"./data_parquet\", exist_ok=True)\n", + "model_parq = MoneyModel(1000)\n", + "model_parq.dc = DataCollector(\n", + " model=model_parq,\n", + " model_reporters={\n", + " \"total_wealth\": lambda m: m.agents[\"wealth\"].sum(),\n", + " \"n_agents\": lambda m: len(m.agents),\n", + " },\n", + " agent_reporters={\n", + " \"wealth\": \"wealth\",\n", + " },\n", + " storage=\"parquet\", # save as parquet\n", + " storage_uri=\"data_parquet\",\n", + " trigger=lambda m: m._steps % 2 == 0,\n", + " reset_memory=True,\n", + ")\n", + "model_parq.run(10)\n", + "model_parq.dc.flush()\n", + "os.listdir(\"./data_parquet\")" + ] + }, + { + "cell_type": "markdown", + "id": "7623eae2785240b9bd12b16a66d81610", + "metadata": {}, + "source": [ + "## Writing to Amazon S3 (CSV or Parquet)\n", + "\n", + "Set AWS credentials via environment variables or your usual config. Then choose `S3-csv` or `S3-parquet` and pass an S3 URI (e.g., `s3://my-bucket/experiments/run-1`).\n", + "\n", + "> **Note:** This cell requires network access & credentials when actually run.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cdc8c89c7104fffa095e18ddfef8986", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "model_s3 = MoneyModel(1000)\n", + "model_s3.dc = DataCollector(\n", + " model=model_s3,\n", + " model_reporters={\n", + " \"total_wealth\": lambda m: m.agents[\"wealth\"].sum(),\n", + " \"n_agents\": lambda m: len(m.agents),\n", + " },\n", + " agent_reporters={\n", + " \"wealth\": \"wealth\",\n", + " },\n", + " storage=\"S3-csv\", # save as csv in S3\n", + " storage_uri=\"s3://my-bucket/experiments/run-1\", # change it to required path\n", + " trigger=lambda m: m._steps % 2 == 0,\n", + " reset_memory=True,\n", + ")\n", + "model_s3.run(10)\n", + "model_s3.dc.flush()" + ] + }, + { + "cell_type": "markdown", + "id": "b118ea5561624da68c537baed56e602f", + "metadata": {}, + "source": [ + "## Writing to PostgreSQL\n", + "\n", + "PostgreSQL requires that the target tables exist and that the expected reporter columns are present. The collector will validate tables/columns up front and raise descriptive errors if something is missing.\n", + "\n", + "Below is a minimal schema example. Adjust columns to your configured reporters.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "938c804e27f84196a10c8828c723f798", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "DDL_MODEL = r\"\"\"\n", + "CREATE SCHEMA IF NOT EXISTS public;\n", + "CREATE TABLE IF NOT EXISTS public.model_data (\n", + " step INTEGER,\n", + " seed VARCHAR,\n", + " total_wealth BIGINT,\n", + " n_agents INTEGER\n", + ");\n", + "\"\"\"\n", + "DDL_AGENT = r\"\"\"\n", + "CREATE TABLE IF NOT EXISTS public.agent_data (\n", + " step INTEGER,\n", + " seed VARCHAR,\n", + " unique_id BIGINT,\n", + " wealth BIGINT\n", + ");\n", + "\"\"\"\n", + "print(DDL_MODEL)\n", + "print(DDL_AGENT)" + ] + }, + { + "cell_type": "markdown", + "id": "504fb2a444614c0babb325280ed9130a", + "metadata": {}, + "source": [ + "After creating the tables (outside this notebook or via a DB connection cell), configure and flush:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59bbdb311c014d738909a11f9e486628", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "POSTGRES_URI = \"postgresql://user:pass@localhost:5432/mydb\"\n", + "m_pg = MoneyModel(300)\n", + "m_pg.dc._storage = \"postgresql\"\n", + "m_pg.dc._storage_uri = POSTGRES_URI\n", + "m_pg.run(6)\n", + "m_pg.dc.flush()" + ] + }, + { + "cell_type": "markdown", + "id": "b43b363d81ae4b689946ece5c682cd59", + "metadata": {}, + "source": [ + "## Triggers & Conditional Collection\n", + "\n", + "The collector accepts a `trigger: Callable[[Model], bool]`. When using `conditional_collect()`, the collector checks the trigger and collects only if it returns `True`.\n", + "\n", + "You can always call `collect()` to gather data unconditionally.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a65eabff63a45729fe45fb5ade58bdc", + "metadata": { + "editable": true + }, + "outputs": [], + "source": [ + "m = MoneyModel(100)\n", + "m.dc.trigger = lambda model: model._steps % 3 == 0 # every 3rd step\n", + "m.run(10, conditional=True)\n", + "m.dc.data[\"model\"].head()" + ] + }, + { + "cell_type": "markdown", + "id": "c3933fab20d04ec698c2621248eb3be0", + "metadata": {}, + "source": [ + "## Troubleshooting\n", + "\n", + "- **ValueError: Please define a storage_uri** β€” for non-memory backends you must set `_storage_uri`.\n", + "- **Missing columns in table** β€” check the PostgreSQL error text; create/alter the table to include the columns for your configured `model_reporters` and `agent_reporters`, plus required `step` and `seed`.\n", + "- **Permissions/credentials errors** (S3/PostgreSQL) β€” ensure correct IAM/credentials or database permissions.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4dd4641cc4064e0191573fe9c69df29b", + "metadata": {}, + "source": [ + "---\n", + "*Generated on 2025-08-30.*\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.x" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/general/user-guide/4_benchmarks.md b/docs/general/user-guide/5_benchmarks.md similarity index 100% rename from docs/general/user-guide/4_benchmarks.md rename to docs/general/user-guide/5_benchmarks.md diff --git a/mesa_frames/__init__.py b/mesa_frames/__init__.py index 1a3ef48b..d47087d1 100644 --- a/mesa_frames/__init__.py +++ b/mesa_frames/__init__.py @@ -64,12 +64,8 @@ def __init__(self, width, height): from mesa_frames.concrete.agentset import AgentSetPolars from mesa_frames.concrete.model import ModelDF from mesa_frames.concrete.space import GridPolars +from mesa_frames.concrete.datacollector import DataCollector -__all__ = [ - "AgentsDF", - "AgentSetPolars", - "ModelDF", - "GridPolars", -] +__all__ = ["AgentsDF", "AgentSetPolars", "ModelDF", "GridPolars", "DataCollector"] __version__ = "0.1.1.dev0" diff --git a/mesa_frames/abstract/datacollector.py b/mesa_frames/abstract/datacollector.py index 6744eb8a..d93f661d 100644 --- a/mesa_frames/abstract/datacollector.py +++ b/mesa_frames/abstract/datacollector.py @@ -127,7 +127,7 @@ def conditional_collect(self) -> None: """ Trigger data collection if condition is met. - This method caslls _collect() to perform actual data collection + This method calls _collect() to perform actual data collection only if trigger returns True Example ------- diff --git a/mkdocs.yml b/mkdocs.yml index f2bbf494..8a462881 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -45,7 +45,7 @@ theme: plugins: - search - mkdocs-jupyter: - execute: true # Ensures the notebooks run and generate output + execute: true # Ensures the notebooks run and generate output - git-revision-date-localized: enable_creation_date: true - minify: @@ -108,13 +108,14 @@ extra: nav: - Home: index.md - User Guide: - - Getting Started: user-guide/0_getting-started.md - - Classes: user-guide/1_classes.md - - Introductory Tutorial: user-guide/2_introductory-tutorial.ipynb - - Advanced Tutorial: user-guide/3_advanced-tutorial.md - - Benchmarks: user-guide/4_benchmarks.md + - Getting Started: user-guide/0_getting-started.md + - Classes: user-guide/1_classes.md + - Introductory Tutorial: user-guide/2_introductory-tutorial.ipynb + - Data Collector Tutorial: user-guide/4_datacollector.ipynb + - Advanced Tutorial: user-guide/3_advanced-tutorial.md + - Benchmarks: user-guide/4_benchmarks.md - API Reference: api/index.html - Contributing: - - Contribution Guide: contributing.md - - Development Guidelines: development/index.md - - Roadmap: roadmap.md \ No newline at end of file + - Contribution Guide: contributing.md + - Development Guidelines: development/index.md + - Roadmap: roadmap.md