In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from rpy2 import robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

# 激活Pandas DataFrame到R data.frame的自动转换
pandas2ri.activate()

# 加载INLA库
ro.r('library(INLA)')

def run_inla_model(formula, data, model_name):
    with localconverter(ro.default_converter + pandas2ri.converter):
        r_df = ro.conversion.py2rpy(data)

    # 在R中创建滞后变量
    formula_parts = formula.split('~')
    lags = []
    for term in formula_parts[1].split('+'):
        if 'lag' in term:
            lag_term = term.strip().split('_')[-1]  # e.g., 'lag1'
            lag = int(''.join(filter(str.isdigit, lag_term)))  # 提取数字部分
            lags.append(lag)
    for lag in lags:
        ro.r(f'r_df$HeatCount_lag{lag} <- c({",".join(["NA"]*lag)}, head(r_df$HeatCount, -{lag}))')

    inla_call = f"""
    result <- inla(formula = {formula}, data = r_df, family = 'gaussian',
                   control.predictor = list(compute = TRUE))
    """
    ro.r(inla_call)
    summary = ro.r('summary(result)')
    ro.r(f'print(summary, digits = 3)')
    mlik = ro.r('result$mlik')[0]
    print(f"模型: {model_name}")
    print(f"边际对数似然: {mlik}")
    print()

    # 提取和打印系数及其统计显著性
    coefs = summary.rx2('fixed')
    print(coefs)

    return mlik, coefs

# 加载数据
df = pd.read_csv('/content/TestData_California.csv')

# 存储模型边际对数似然值和系数
results = []

# 运行不包含滞后变量的模型
base_formula = "SentimentScore ~ HeatCount"
print("运行不包含滞后变量的模型")
base_mlik, base_coefs = run_inla_model(base_formula, df, "基础模型")
results.append(("基础模型", base_mlik, base_coefs))

# 运行包含不同滞后周期的模型
for lag_weeks in range(1, 4):
    lagged_formula = "SentimentScore ~ HeatCount"
    for i in range(1, lag_weeks + 1):
        lagged_formula += f" + HeatCount_lag{i}"  # 假设lag1代表一周的滞后
    model_name = f"滞后{lag_weeks}周模型"
    print(f"运行包含滞后{lag_weeks}周的模型")
    mlik, coefs = run_inla


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from rpy2 import robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

# 激活Pandas DataFrame到R data.frame的自动转换
pandas2ri.activate()

# 加载INLA库
ro.r('library(INLA)')

def run_inla_model(formula, data, model_name):
    with localconverter(ro.default_converter + pandas2ri.converter):
        r_df = ro.conversion.py2rpy(data)

        # 在R中创建滞后变量(如果需要)
        if 'lag' in formula:
            lags = [int(''.join(filter(str.isdigit, term.split('_')[-1])))
                    for term in formula.split('~')[1].split('+') if 'lag' in term]
            for lag in lags:
                ro.r(f'r_df$HeatCount_lag{lag} <- c(rep(NA, {lag}), head(r_df$HeatCount, -{lag}))')

        inla_call = f"""
        result <- inla(formula = {formula}, data = r_df, family = 'gaussian', control.predictor = list(compute = TRUE))
        """
        ro.r(inla_call)
        ro.r(f'print(summary(result), digits = 3)')
        mlik = ro.r('result$mlik')[0]
        print(f"模型: {model_name}")
        print(f"边际对数似然: {mlik}")
        print()
        return mlik

# 加载数据
df = pd.read_csv('/content/TestData_California.csv')

# 存储模型边际对数似然值
mliks = []
models = []

# 运行不包含滞后变量的模型
base_formula = "SentimentScore ~ HeatCount"
print("运行不包含滞后变量的模型")
base_mlik = run_inla_model(base_formula, df, "基础模型")
mliks.append(base_mlik)
models.append("基础模型")

# 运行包含不同滞后周期的模型
for lag_weeks in range(1, 5):
    lagged_formula = "SentimentScore ~ HeatCount + " + " + ".join([f"HeatCount_lag{i*7}" for i in range(1, lag_weeks+1)])
    model_name = f"滞后{lag_weeks}周模型"
    print(f"运行包含滞后{lag_weeks}周的模型")
    mlik = run_inla_model(lagged_formula, df, model_name)
    mliks.append(mlik)
    models.append(model_name)

# 绘制边际对数似然值变化趋势
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(models, mliks, marker='o')
ax.set_xticks(range(len(models)))
ax.set_xticklabels(models, rotation=45, ha='right')
ax.set_ylabel('边际对数似然值')
ax.set_title('模型边际对数似然值变化趋势')

# 标注数据点
for i, mlik in enumerate(mliks):
    ax.annotate(f"{mlik:.2f}", xy=(i, mlik), xytext=(5, 5), textcoords='offset points')

# 调整图例位置
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')

plt.show()

In [None]:
#可视化2

import pandas as pd
import matplotlib.pyplot as plt
from rpy2 import robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

# 激活Pandas DataFrame到R data.frame的自动转换
pandas2ri.activate()

# 加载INLA库
ro.r('library(INLA)')

def run_inla_model(formula, data, model_name):
    with localconverter(ro.default_converter + pandas2ri.converter):
        r_df = ro.conversion.py2rpy(data)

    # 在R中创建滞后变量
    formula_parts = formula.split('~')
    lags = []
    for term in formula_parts[1].split('+'):
        if 'lag' in term:
            lag_term = term.strip().split('_')[-1]
            lag = int(''.join(filter(str.isdigit, lag_term)))
            lags.append(lag)
    for lag in lags:
        ro.r(f'r_df$HeatCount_lag{lag} <- c({",".join(["NA"]*lag)}, head(r_df$HeatCount, -{lag}))')

    inla_call = f"""
    result <- inla(formula = {formula}, data = r_df, family = 'gaussian',
                   control.predictor = list(compute = TRUE))
    """
    ro.r(inla_call)
    summary = ro.r('summary(result)')
    ro.r(f'print(summary, digits = 3)')
    mlik = ro.r('result$mlik')[0]
    coefs = summary.rx2('fixed').rx2('mean')

    return mlik, coefs

# 加载数据
df = pd.read_csv('/content/TestData_California.csv')

# 存储模型结果
results = []

# 运行不包含滞后变量的模型
base_formula = "SentimentScore ~ HeatCount"
base_mlik, base_coefs = run_inla_model(base_formula, df, "基础模型")
results.append(("基础模型", base_mlik, base_coefs))

# 运行包含不同滞后周期的模型
for lag_weeks in range(1, 4):
    lagged_formula = "SentimentScore ~ HeatCount"
    for i in range(1, lag_weeks + 1):
        lagged_formula += f" + HeatCount_lag{i}"
    model_name = f"滞后{lag_weeks}周模型"
    mlik, coefs = run_inla_model(lagged_formula, df, model_name)
    results.append((model_name, mlik, coefs))

# 绘制边际对数似然值变化趋势
plt.figure(figsize=(12, 6))
model_names, mliks, _ = zip(*results)
plt.plot(model_names, mliks, marker='o')
plt.xticks(rotation=45)
plt.ylabel('边际对数似然值')
plt.title('模型边际对数似然值变化趋势')
plt.show()

# 绘制各个模型的滞后变量效应
plt.figure(figsize=(12, 6))
for model_name, _, coefs in results:
    plt.plot(coefs.index, coefs, marker='o', label=model_name)
plt.xticks(rotation=45)
plt.ylabel('系数值')
plt.title('各模型滞后变量的效应大小')
plt.legend()
plt.show
