# kedro Test

    codiet_kedro_project/
    ├── conf/
    │   └── base/
    │       ├── catalog.yml
    │       └── parameters.yml
    ├── data/
    │   ├── REDcap_data_combined_nov2024.xlsx
    │   ├── DBN_test_data.csv
    │   └── CoDiet_Sociodemographics_abbre.csv
    ├── src/
    │   └── codiet/
    │       ├── pipelines/
    │       │   └── main/
    │       │       ├── data_cleaning.py
    │       │       ├── data_cleaning_nodes.py
    │       │       ├── feature_selection.py
    │       │       ├── pipeline.py
    │       |       └── pipeline_registry.py
    │       ├── run.py
    │       └── __init__.py
    ├── demo/
    │   └── notebook.py
    ├── tests/
    │   └── test_nodes.py
    └── README.md


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("/content/drive/My Drive/Colab Notebooks/Causality/Methods/DNN/codiet_kedro_project/")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 在Colab中测试Kedro项目
# !pip install kedro==0.18.12 pandas==2.0.3 numpy==1.24.3 scikit-learn==1.3.0 torch==2.0.1 seaborn==0.12.2 matplotlib==3.7.2 openpyxl==3.1.2
!pip install scikit-learn==1.3.0 torch==2.0.1 seaborn==0.12.2 matplotlib==3.7.2 openpyxl==3.1.2
!pip install wheel setuptools cython
!pip install "numpy>=1.26,<2.0" "pandas>=2.1,<2.2" "kedro==0.18.12"




In [None]:
import numpy as np
import pandas as pd
from src.codiet.pipelines.main import pipeline, data_cleaning_nodes, feature_selection



## 用 KedroContext 加载项目配置并尝试读取 DataCatalog 的数据集

In [None]:
from kedro.framework.context import KedroContext
from kedro.framework.hooks import _create_hook_manager
from pathlib import Path
from kedro.config import ConfigLoader

project_path = Path.cwd()
package_name = "codiet"
conf_path = project_path / "conf"
env = "base"
config_loader = ConfigLoader(conf_source=str(conf_path), env=env)


try:
    context = KedroContext(package_name=package_name, project_path=project_path, config_loader=config_loader, hook_manager=_create_hook_manager())
    print("KedroContext created OK")
except Exception as e:
    print("KedroContext creation failed:", e)
    raise

try:
    catalog = context.catalog
    print("Catalog datasets:", list(catalog.list()))
except Exception as e:
    print("Catalog load error:", e)

catalog = context.catalog
for key in list(catalog.list())[:5]:
    try:
        ds = catalog.load(key)
        print(f"Loaded {key} -> type: {type(ds)}, shape(if dataframe): {getattr(ds,'shape',None)}")
    except Exception as e:
        print(f"Failed to load {key}: {e}")


KedroContext created OK


Catalog datasets: ['sociodemographics_data', 'exdbn_data', 'abbreviation_data', 'cleaned_data', 'codiet_dic_data', 'correlation_heatmap', 'parameters', 'params:feature_selection', 'params:feature_selection.threshold', 'params:feature_selection.method', 'params:data_cleaning', 'params:data_cleaning.remove_nulls', 'params:data_cleaning.fill_method']


Loaded sociodemographics_data -> type: <class 'pandas.core.frame.DataFrame'>, shape(if dataframe): (19, 94)


Loaded exdbn_data -> type: <class 'pandas.core.frame.DataFrame'>, shape(if dataframe): (329, 1)


Loaded abbreviation_data -> type: <class 'pandas.core.frame.DataFrame'>, shape(if dataframe): (334, 3)


Loaded cleaned_data -> type: <class 'pandas.core.frame.DataFrame'>, shape(if dataframe): (304, 1)


Loaded codiet_dic_data -> type: <class 'pandas.core.frame.DataFrame'>, shape(if dataframe): (440, 7)


## 验证 Kedro CLI 能读取配置、运行 pipeline、执行所有节点并写入 outputs（catalog 指定的位置）

In [None]:
import subprocess
import sys

cmd = [
    sys.executable,
    "-W", "default:Kedro is not yet fully compatible",
    "-m", "kedro", "run",
    "--"
]

# 捕获输出
result = subprocess.run(cmd, capture_output=True, text=True)

print("stdout:")
print(result.stdout)
print("\nstderr:")
print(result.stderr)

print("\n返回码:", result.returncode)


stdout:
[10/22/25 14:30:47] INFO     Kedro project codiet_kedro_project   session.py:364
                             ages/kedro/framework/session/sessio                
                             ConfigLoader will be deprecated in                 
                             Kedro 0.19. Please use the                         
                             OmegaConfigLoader instead. To                      
                             consult the documentation for                      
                             OmegaConfigLoader, see here:                       
                             https://docs.kedro.org/en/stable/co                
                             nfiguration/advanced_configuration.                
                             html#omegaconfigloader                             
                                                                                
[10/22/25 14:30:55] INFO     Loading data from               data_catalog.py:475
                    

## 写入pyproject.toml,如果没有

In [None]:
from pathlib import Path

p = Path("pyproject.toml")
if not p.exists():
    p.write_text(
        '[tool.kedro]\n'
        'package_name = "codiet"\n'
        'project_name = "codiet_kedro_project"\n'
        'kedro_init_version = "0.18.12"\n'
        'source_dir = "src"\n',
        encoding='utf-8'
    )
print("pyproject.toml exists:", p.exists())


pyproject.toml exists: True


In [None]:
from kedro.framework.cli import main
import subprocess
import sys

def run_kedro():
    cmd = [
        sys.executable, "-W",
        "default:Kedro is not yet fully compatible",
        "-m", "kedro", "run", "--",  # <-- 注意这里加 "--" 告诉 Click 停止解析后续 Jupyter 参数
    ]
    subprocess.run(cmd, check=True)

if __name__ == "__main__":
    run_kedro()


In [None]:
from pathlib import Path

package_name = "codiet"
src_dir = Path("src")
package_dir = src_dir / package_name

# 创建目录
package_dir.mkdir(parents=True, exist_ok=True)

# 创建 __init__.py
(package_dir / "__init__.py").touch(exist_ok=True)

# 创建空的 settings.py
settings_file = package_dir / "settings.py"
if not settings_file.exists():
    settings_file.write_text("# Kedro settings placeholder\n", encoding="utf-8")

print("项目结构已创建/检查完成：")
print(list(src_dir.rglob("*")))


项目结构已创建/检查完成：
[PosixPath('src/.DS_Store'), PosixPath('src/codiet'), PosixPath('src/codiet/pipelines'), PosixPath('src/codiet/__init__.py'), PosixPath('src/codiet/.DS_Store'), PosixPath('src/codiet/run.py'), PosixPath('src/codiet/__pycache__'), PosixPath('src/codiet/settings.py'), PosixPath('src/codiet/pipeline_registry.py'), PosixPath('src/codiet/pipelines/__init__.py'), PosixPath('src/codiet/pipelines/main'), PosixPath('src/codiet/pipelines/.DS_Store'), PosixPath('src/codiet/pipelines/__pycache__'), PosixPath('src/codiet/pipelines/main/__init__.py'), PosixPath('src/codiet/pipelines/main/.DS_Store'), PosixPath('src/codiet/pipelines/main/__pycache__'), PosixPath('src/codiet/pipelines/main/data_cleaning_nodes.py'), PosixPath('src/codiet/pipelines/main/feature_selection.py'), PosixPath('src/codiet/pipelines/main/pipeline.py'), PosixPath('src/codiet/pipelines/main/__pycache__/pipeline.cpython-311.pyc'), PosixPath('src/codiet/pipelines/main/__pycache__/data_cleaning_nodes.cpython-311.pyc'),