In [None]:
# 安装Python库rpy2
!pip install rpy2

# 安装R包INLA及其依赖包
!R -e "install.packages(c('sp', 'fmesher'), repos=c('http://cran.r-project.org'))"
!R -e "install.packages('INLA', repos='https://inla.r-inla-download.org/R/stable')"


R version 4.3.3 (2024-02-29) -- "Angel Food Cake"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> install.packages(c('sp', 'fmesher'), repos=c('http://cran.r-project.org'))
Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'http://cran.r-project.org/src/contrib/sp_2.1-3.tar.gz'
Content type 'application/x-gzip' length 1244605 bytes (1.2 MB)
downloaded 1.2 MB


In [None]:
import geopandas as gpd
import pandas as pd

# 读取SHP文件
gdf = gpd.read_file('/content/County.shp')

# 计算邻接关系
adjacency_matrix = gdf.geometry.apply(lambda x: gdf.geometry.touches(x)).astype(int)

adjacency_df = pd.DataFrame(adjacency_matrix, index=gdf.index, columns=gdf.index)

In [None]:
# 显示邻接矩阵的前5行和前5列
print(adjacency_df.iloc[:5, :5])

   0  1  2  3  4
0  0  0  0  0  0
1  0  0  0  0  0
2  0  0  0  0  0
3  0  0  0  0  0
4  0  0  0  0  0


In [None]:
import geopandas as gpd

# 读取SHP文件
gdf = gpd.read_file('/content/County.shp')

# 检查数据框架以确认GEOID列存在
print(gdf.columns)

Index(['STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOID', 'NAME', 'NAMELSAD', 'LSAD',
       'CLASSFP', 'MTFCC', 'CSAFP', 'CBSAFP', 'METDIVFP', 'FUNCSTAT', 'ALAND',
       'AWATER', 'INTPTLAT', 'INTPTLON', 'geoid_j', 'geometry'],
      dtype='object')


In [None]:
import geopandas as gpd
import pandas as pd

# 读取SHP文件
gdf = gpd.read_file('/content/County.shp')

# 将GEOID设置为索引
gdf.set_index('GEOID', inplace=True)

# 计算邻接矩阵，使用GEOID作为行和列的标签
adjacency_matrix = gdf.geometry.apply(lambda x: gdf.geometry.touches(x)).astype(int)
adjacency_df = pd.DataFrame(adjacency_matrix, index=gdf.index, columns=gdf.index)

# 显示邻接矩阵的前5行和前5列
print(adjacency_df.iloc[:5, :5])

GEOID  31039  53069  35011  31109  31129
GEOID                                   
31039      0      0      0      0      0
53069      0      0      0      0      0
35011      0      0      0      0      0
31109      0      0      0      0      0
31129      0      0      0      0      0


In [None]:
pip install --upgrade pandas rpy2



In [None]:
pip install --upgrade rpy2



In [None]:
from rpy2 import robjects as ro
import rpy2.robjects.pandas2ri as pandas2ri
from rpy2.rinterface_lib.embedded import RRuntimeError
import pandas as pd

# 加载数据
data_path = "/content/TestData_California.csv"
df = pd.read_csv(data_path)

# 尝试加载INLA库,捕获任何错误
try:
    inla = ro.r('library(INLA)')
except RRuntimeError as e:
    print(f"Failed to load INLA library in R: {e}")

# 将Pandas DataFrame转换为R的data.frame
r_df = pandas2ri.ri2py_dataframe(df)

# 设置R全局环境中的数据框
ro.globalenv['r_df'] = r_df

def run_inla_model(formula, data):
    """
    使用INLA运行贝叶斯层次模型。
    :param formula: R风格的模型公式字符串。
    :param data: 数据集,Pandas DataFrame格式。
    """
    try:
        # 运行INLA模型
        model_fit = ro.r(f"""
            inla(formula = '{formula}', data = r_df, family = 'gaussian', control.predictor = list(compute = TRUE))
        """)
        # 打印模型摘要
        print(ro.r('summary')(model_fit))
    except RRuntimeError as e:
        print(f"Error running INLA model: {e}")

# 定义基础模型公式
base_model_formula = "SentimentScore ~ as.factor(STATEcode) + as.factor(Year) + as.factor(Month) + as.factor(Week) + as.factor(Weekend) + as.factor(Holiday) + VulnerabilityIndex"

# 运行基础模型
print("Running Base Model")
run_inla_model(base_model_formula, df)

AttributeError: module 'rpy2.robjects.pandas2ri' has no attribute 'ri2py_dataframe'

In [None]:
base_formula = "Sentiment ~ STATEcode + COUNTYFP + Year + Month + Week + Weekend + Holiday + VulnerabilityIndex"

# 添加各种环境因素的模型公式
model_formulas = {
    "Base + Heatwave": base_formula + " + Heatwave",
    "Base + Air Pollution": base_formula + " + AirPollutionInterpolate",
    "Base + Rainfall": base_formula + " + PrecipitationAnomaly",
    "Base + Heatwave + Air Pollution": base_formula + " + Heatwave + AirPollutionInterpolate",
    "Base + Heatwave + Rainfall": base_formula + " + Heatwave + PrecipitationAnomaly",
    "Base + Air Pollution + Rainfall": base_formula + " + AirPollutionInterpolate + PrecipitationAnomaly",
    "Base + All Environmental Factors": base_formula + " + Heatwave + AirPollutionInterpolate + PrecipitationAnomaly"
}

# 假设run_inla_model是用来运行模型的函数，模型结果将会告诉我们哪个模型最适合
for model_name, formula in model_formulas.items():
    print(f"Running {model_name}")
    run_inla_model(formula, df)
    print("\n" + "="*50 + "\n")


Running Base + Heatwave


    Conversion rules for `rpy2.robjects` appear to be missing. Those
    rules are in a Python `contextvars.ContextVar`. This could be caused
    by multithreading code not passing context to the thread.
    Check rpy2's documentation about conversions.
    


NotImplementedError: 
    Conversion rules for `rpy2.robjects` appear to be missing. Those
    rules are in a Python `contextvars.ContextVar`. This could be caused
    by multithreading code not passing context to the thread.
    Check rpy2's documentation about conversions.
    

In [None]:
# 假设df已经包含了“SentimentScore”, "Heatwave", "AirPollutionInterpolate", "PrecipitationAnomaly"
# 为每个滞后天数生成变量
for lag in range(1, 15):
    df[f"Heatwave_lag{lag}"] = df['Heatwave'].shift(lag)
    df[f"AirPollutionInterpolate_lag{lag}"] = df['AirPollutionInterpolate'].shift(lag)
    df[f"PrecipitationAnomaly_lag{lag}"] = df['PrecipitationAnomaly'].shift(lag)

# 选择最佳的基础模型加上所有滞后变量
dlnm_formula = "SentimentScore ~ " + " + ".join([f"Heatwave_lag{lag}" for lag in range(1, 15)]) + " + " + \
               " + ".join([f"AirPollutionInterpolate_lag{lag}" for lag in range(1, 15)]) + " + " + \
               " + ".join([f"PrecipitationAnomaly_lag{lag}" for lag in range(1, 15)]) + \
               " + other variables from the best model"

# 运行DLNM模型
print("Running DLNM Model with Lags 1-14")
run_inla_model(dlnm_formula, df)