In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from pgmpy.models import BayesianNetwork
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.inference import VariableElimination


def read_excel_data(file_path: str, sheet_name=0):
    """
    读取 Excel 数据到 pandas DataFrame, 并做简单清理(删除空值).
    """
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df.dropna(inplace=True)  # 简单处理空值: 直接删除包含空值的行
    # 如果需要, 可以把列转成 category 类型:
    for col in df.columns:
        df[col] = df[col].astype('category')
    return df


def visualize_network(bayes_model: BayesianNetwork, title="Bayesian Network"):
    """
    可视化给定的 BayesianNetwork.
    """
    # 使用 networkx 的绘图布局
    graph = bayes_model.to_digraph()
    pos = nx.spring_layout(graph, seed=42)  # 固定随机种子, 保证布局一致

    plt.figure(figsize=(6, 4))
    nx.draw_networkx_nodes(graph, pos)
    nx.draw_networkx_edges(graph, pos, arrows=True)
    nx.draw_networkx_labels(graph, pos)
    plt.title(title)
    plt.axis("off")
    plt.show()


def create_empty_network(nodes):
    """
    创建一个只包含节点、没有任何边的空网络, 方便演示 "训练前" 的网络结构.
    """
    model = BayesianNetwork()
    model.add_nodes_from(nodes)
    return model


def run_bayesian_network_demo(excel_file_path):
    """
    主函数:
    1. 读取数据
    2. 创建一个空网络(演示训练前)
    3. 结构学习 & 参数学习
    4. 可视化训练前后网络
    5. 简单做个推断
    """
    # ================ 1. 读取数据 ================
    df = read_excel_data(excel_file_path)
    print("数据样例:")
    print(df.head(), "\n")

    # ================ 2. 创建空网络(训练前) ================
    # 假设所有列都是离散变量
    nodes = list(df.columns)
    empty_model = create_empty_network(nodes)

    print("训练前(手动创建)的空网络结构: ")
    print(empty_model.edges())  # 没有任何边
    visualize_network(empty_model, title="Empty Network (Before Training)")

    # ================ 3. 结构学习 & 参数学习 ================
    # 3.1 使用 HillClimbSearch 做结构学习
    hc = HillClimbSearch(data=df, scoring_method=BicScore(df))
    best_model_structure = hc.estimate()

    # 3.2 将学到的结构转为 BayesianNetwork
    trained_model = BayesianNetwork(best_model_structure.edges())
    # 3.3 做参数学习 (这里用 MLE, 也可以改成 BayesianEstimator)
    trained_model.fit(df, estimator=MaximumLikelihoodEstimator)
    # 如果想用贝叶斯估计:
    # trained_model.fit(df, estimator=BayesianEstimator, prior_type="BDeu")

    print("训练后(结构学习)得到的网络结构:")
    print(trained_model.edges())
    visualize_network(trained_model, title="Trained Network (After Structure Learning)")

    # ================ 4. 推断 (Inference) ================
    inference = VariableElimination(trained_model)
    # 下面随便找一个节点(假设 df 有一列叫 'A' 和 'B'),做示例推断:
    # 请根据实际列名替换 'A' / 'B'
    if 'A' in nodes and 'B' in nodes:
        print("对网络进行推断: P(A | B=b0)...")
        result = inference.query(['A'], evidence={'B': df['B'].cat.categories[0]})
        print(result)
    else:
        print("示例推断: 数据集里没有找到名为 'A' 或 'B' 的列，跳过演示。")


if __name__ == "__main__":
    # 请将 "your_data.xlsx" 替换为实际路径
    excel_file = "your_data.xlsx"
    run_bayesian_network_demo(excel_file)


ImportError: cannot import name 'BicScore' from 'pgmpy.estimators' (c:\Users\jxq61\AppData\Local\Programs\Python\Python313\Lib\site-packages\pgmpy\estimators\__init__.py)

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.inference import VariableElimination

In [16]:



def read_excel_data(file_path: str, sheet_name=0):
    """
    读取 Excel 数据到 pandas DataFrame, 并做简单清理(删除空值)。
    这里假设数据都是离散变量，可能需要额外的离散化步骤。
    """
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df.dropna(inplace=True)  # 简单处理空值: 直接删除包含空值的行
    # 如果需要, 可以把列转成 category 类型:
    for col in df.columns:
        df[col] = df[col].astype('category')
    return df


def visualize_network(bayes_model: DiscreteBayesianNetwork, title="Bayesian Network"):
    """
    可视化给定的 BayesianNetwork.
    """
    graph = bayes_model.to_digraph()
    pos = nx.spring_layout(graph, seed=42)  # 固定seed以便每次布局一致
    
    plt.figure(figsize=(6, 4))
    nx.draw_networkx_nodes(graph, pos)
    nx.draw_networkx_edges(graph, pos, arrows=True)
    nx.draw_networkx_labels(graph, pos)
    plt.title(title)
    plt.axis("off")
    plt.show()


def main():
    # ================ 1. 读取数据 ================
    excel_file_path = "data(imputed).xlsx"  # 替换成你自己的Excel文件名
    df = read_excel_data(excel_file_path)
    print("数据样例：")
    print(df.head(), "\n")

    # ================ 2. 给定结构 ================
    # 假设数据中含有四列: "A", "B", "C", "D"
    # 这里的结构是 A -> B, B -> C, A -> D
    # （请根据实际数据列名自行修改）
    edges = [
        ("粮食面积", "粮食产量"),
        ("节水灌溉面积", "粮食产量"),
        ("化肥合计","粮食产量"),
        ("农用中大型拖拉机数量", "农业机械总动力"),
        ("农业机械总动力", "粮食面积"),
        ("耕地占用税", "粮食面积"),
    ]
    
    model = DiscreteBayesianNetwork(edges)

    # ================ 3. 进行参数学习 ================
    # 使用最大似然估计 (MLE)
    # 如果想用BayesianEstimator，可以改为:
    # model.fit(df, estimator=BayesianEstimator, prior_type="BDeu")
    model.fit(df, estimator=MaximumLikelihoodEstimator)

    # ================ 4. 可视化网络 ================
    # 注意：这是训练后（其实结构没变，参数已更新）的模型可视化
    print("给定的网络结构：", model.edges())
    visualize_network(model, title="Bayesian Network (Given Structure)")

    # # ================ 5. 提供推断功能 ================
    # inference = VariableElimination(model)

    # # 假设我们想查询 P(C | A=?, B=?)
    # # 这里演示一下如何使用 df 中的分类值进行推断:
    # if 'A' in df.columns and 'B' in df.columns and 'C' in df.columns:
    #     # 取A、B列第一个分类的值举例:
    #     val_a = df['A'].cat.categories[0]
    #     val_b = df['B'].cat.categories[0]
        
    #     # 构造证据
    #     evidence = {'A': val_a, 'B': val_b}
    #     print(f"示例推断: P(C | A={val_a}, B={val_b})")
        
    #     result = inference.query(['C'], evidence=evidence)
    #     print(result)
    # else:
    #     print("推断示例：数据集里没有找到列名 A, B, C，无法演示。")

In [14]:
df = read_excel_data("data(imputed).xlsx")

In [15]:
df.columns

Index(['耕地占用税', '粮食面积', '粮食产量', '人均可支配收入', '农业机械总动力', '农用中大型拖拉机数量', '化肥合计',
       '节水灌溉面积', '农药使用量'],
      dtype='object')