project-name/
├── .github/workflows # CI/CD github actions
├── data/
│ ├── raw/ # original data dump
│ ├── interim/ # intermediate data that has been transformed
│ ├── processed/ # final canonical datasets for modeling
│ └── external/ # data from third party sources
│
├── docs/ # mkdocs
│
├── notebooks/ # Jupyter notebooks. Naming convention is a number (for ordering),
│ │ # and a short `-` delimited description.
│ ├── 01_exploration.ipynb # Exploratory data analysis
│ ├── 02_preprocess.ipynb # Cleaning, merging... needed to get processed dataset
│ ├── 03_model01.ipynb
│ ├── 04_model02.ipynb
│ └── 05_validate.ipynb # accuracy scores, interpret results, compare models
│
├── reports/ # generated analysis as HTML, PDF, LaTex, etc
│ └── figures/ # generated graphics and figures to be used in reporting
│
├── src/ # source code to use in this project
│ ├── __init__.py
│ ├── config.py # store useful variables and configuration
│ ├── dataset.py # extract, clean and validate data
│ ├── models/ # ML model engineering (a folder for each model)
│ │ ├── __init__.py
│ │ ├── model01/
│ │ │ ├── dataloader.py
│ │ │ ├── model01.py
│ │ │ ├── predict.py
│ │ │ └── train.py
│
├── models/ # trained and serialized models, model predictions...
│ └── model01.pkl
│
├── tests/
│ ├── conftest.py # configuration for pytest
│ ├── test_data.py
│ ├── test_features.py
│ └── test_models.py
│
├── .gitignore
├── AGENTS.md # instructions for your agentic IDE to follow standard rules and
│ # guidelines for your project
├── CONTRIBUTING.md # let the community know how to make PR or contribute to the project
├── LICENSE
├── Makefile # convenience commands like `make data` or `make train`
├── README.md
└── requirements.txt # e.g. generated with `pip freeze > requirements.txt`