In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import warnings
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

warnings.filterwarnings("ignore")
plt.rcParams["font.family"] = ["Microsoft YaHei"]
plt.rcParams["axes.unicode_minus"] = False

CURRENT_DIR = os.getcwd()
DATA_DIR = os.path.join(CURRENT_DIR, "data")
RESULT_DIR = os.path.join(CURRENT_DIR, "results")
os.makedirs(RESULT_DIR, exist_ok=True)

final_df = pd.read_csv(os.path.join(DATA_DIR, "final_cleaned_data.csv"))
model_df = final_df[final_df["Year"].between(2010, 2020)].copy()

model_df["Per_Capita_GHG"] = model_df["Per_Capita_GHG"] * 1e6  
model_df["Per_Capita_Energy"] = model_df["Per_Capita_Energy"] * 1e6  


model_df = model_df[["Country", "Year", "Per_Capita_GDP", "Per_Capita_Energy", "Per_Capita_GHG"]].dropna()
model_df = model_df[(model_df["Per_Capita_GHG"] > 0) & (model_df["Per_Capita_GHG"] < 50)]

print(f"样本量：{model_df.shape[0]} | 人均排放范围：{model_df['Per_Capita_GHG'].min():.1f}~{model_df['Per_Capita_GHG'].max():.1f}吨/人")

忽视一些奇怪的警告.
创建对应的文件夹
数据加载,将量级和与显示相对应
对核心特征进行筛选

In [None]:

scaler = StandardScaler()
X = scaler.fit_transform(model_df[["Per_Capita_GDP", "Per_Capita_Energy"]])
y = model_df["Per_Capita_GHG"].values


lr_model = LinearRegression()
lr_model.fit(X, y)

# 输出权重结果
feature_names = ["人均GDP", "人均能源消耗"]
coef_df = pd.DataFrame({
    "影响因素": feature_names,
    "权重系数": lr_model.coef_,
    "绝对权重": np.abs(lr_model.coef_)
}).sort_values("绝对权重", ascending=False)

print("\n===== 人均CO₂排放影响因素权重 =====")
print(coef_df.round(2))
print(f"模型解释力（R²）：{r2_score(y, lr_model.predict(X)):.3f}")


plt.figure(figsize=(7, 5), dpi=150)
colors = ["#2A9D8F" if c > 0 else "#E76F51" for c in coef_df["权重系数"]]
bars = plt.bar(coef_df["影响因素"], coef_df["权重系数"], color=colors, width=0.6)


for bar in bars:
    height = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width()/2,
        height + (1 if height > 0 else -1),
        f"{height:.1f}",
        ha="center", fontsize=11
    )


plt.title("人均CO₂排放影响因素权重（2010-2020）", fontsize=12)
plt.ylabel("权重系数（吨/人）", fontsize=11)
plt.axhline(y=0, color="#666", linestyle="--")
plt.grid(axis="y", alpha=0.2)
plt.tight_layout()


save_path = os.path.join(RESULT_DIR, "impact_factors_final.png")
plt.savefig(save_path, dpi=150, facecolor="white")
plt.close()
print(f"{save_path}")

保留GDP和能源排放
多元线性回归建模
可视化权重,标注权值

In [None]:

def evaluate_paris_agreement():
    
    pre_agreement = model_df[model_df["Year"].between(2010, 2015)].groupby("Year")["Per_Capita_GHG"].mean()
    post_agreement = model_df[model_df["Year"].between(2016, 2020)].groupby("Year")["Per_Capita_GHG"].mean()

    # 2. 计算年均变化率（现实量级）
    pre_rate = (pre_agreement.iloc[-1] - pre_agreement.iloc[0]) / 5 / pre_agreement.iloc[0] * 100
    post_rate = (post_agreement.iloc[-1] - post_agreement.iloc[0]) / 5 / post_agreement.iloc[0] * 100

    
    print(f"2010-2015协定前：年均{pre_rate:.1f}%")
    print(f"2016-2020协定后：年均{post_rate:.1f}%")

    
    plt.figure(figsize=(9, 5), dpi=150)
    
    plt.plot(
        pre_agreement.index, pre_agreement.values,
        color="#E76F51", linewidth=3, marker="o", label=f"协定前（{pre_rate:.1f}%/年）"
    )
    
    plt.plot(
        post_agreement.index, post_agreement.values,
        color="#2A9D8F", linewidth=3, marker="s", label=f"协定后（{post_rate:.1f}%/年）"
    )
    
    plt.axvline(x=2015, color="#666", linestyle="--", label="巴黎协定签署（2015）")

   
    plt.title("2010-2020 巴黎协定前后人均CO₂排放变化", fontsize=12)
    plt.xlabel("年份", fontsize=11)
    plt.ylabel("人均CO₂排放（吨/人）", fontsize=11)
    plt.xticks(list(pre_agreement.index) + list(post_agreement.index), fontsize=9)
    plt.grid(alpha=0.2)
    plt.legend(fontsize=10)
    plt.tight_layout()


    save_path = os.path.join(RESULT_DIR, "paris_agreement_final.png")
    plt.savefig(save_path, dpi=150, facecolor="white")
    plt.close()
    print(f"{save_path}")

    print("\n===== 分地区减排效果 =====")
    continent_mapping = {
        "China":"Asia", "India":"Asia", "United States":"North America", "Germany":"Europe",
        "Brazil":"South America", "South Africa":"Africa", "Australia":"Oceania"
    }
    model_df["Continent"] = model_df["Country"].map(continent_mapping)
    valid_continent = model_df[model_df["Continent"].notna()]

    for continent in valid_continent["Continent"].unique():
        cont_data = valid_continent[valid_continent["Continent"] == continent]
        if len(cont_data) > 10:  # 过滤样本量过少的地区
            c_pre = cont_data[cont_data["Year"].between(2010,2015)].groupby("Year")["Per_Capita_GHG"].mean()
            c_post = cont_data[cont_data["Year"].between(2016,2020)].groupby("Year")["Per_Capita_GHG"].mean()
            
            c_pre_rate = (c_pre.iloc[-1]-c_pre.iloc[0])/5/c_pre.iloc[0]*100
            c_post_rate = (c_post.iloc[-1]-c_post.iloc[0])/5/c_post.iloc[0]*100
            
            print(f"{continent}：协定前{ c_pre_rate:.1f}% → 协定后{c_post_rate:.1f}%")


evaluate_paris_agreement()

巴黎协定减排
对比数据,20210到2015,2015到2020年
# 协定前趋势（红色）# 协定前趋势（红色）
# 协定前趋势（红色）
分地区保留减排效果

In [None]:

from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler


cluster_df = model_df[["Country", "Per_Capita_Energy", "Per_Capita_GHG"]].dropna()

mm_scaler = MinMaxScaler()
cluster_features = mm_scaler.fit_transform(cluster_df[["Per_Capita_Energy", "Per_Capita_GHG"]])


kmeans = KMeans(n_clusters=3, random_state=42)
cluster_df["Cluster"] = kmeans.fit_predict(cluster_features)


cluster_labels = {
    0: "低排放-低能耗组",
    1: "中排放-中能耗组",
    2: "高排放-高能耗组"
}
cluster_df["Cluster_Name"] = cluster_df["Cluster"].map(cluster_labels)


print("\n国家聚类结果")
for cluster in range(3):
    cluster_countries = cluster_df[cluster_df["Cluster"] == cluster]["Country"].unique()[:5]  # 取前5个
    print(f"{cluster_labels[cluster]}：{', '.join(cluster_countries)}")


plt.figure(figsize=(10, 6), dpi=150)
colors = ["#2A9D8F", "#E9C46A", "#E76F51"]
markers = ["o", "s", "^"]

for cluster in range(3):
    subset = cluster_df[cluster_df["Cluster"] == cluster]
    plt.scatter(
        subset["Per_Capita_Energy"], subset["Per_Capita_GHG"],
        c=colors[cluster], marker=markers[cluster], s=60, alpha=0.7,
        label=cluster_labels[cluster]
    )

plt.title("全球国家人均能源消耗-CO₂排放聚类分析（2010-2020）", fontsize=12)
plt.xlabel("人均能源消耗（吨标准煤/人）", fontsize=11)
plt.ylabel("人均CO₂排放（吨/人）", fontsize=11)
plt.grid(alpha=0.2)
plt.legend(fontsize=10)
plt.tight_layout()


save_path = os.path.join(RESULT_DIR, "country_clustering_final.png")
plt.savefig(save_path, dpi=150, facecolor="white")
plt.close()
print(f"聚类图已保存 → {save_path}")

#归一化特征（0-1区间，提升聚类效果）
 K-Means聚类（最优k值=3，分为低/中/高排放组
聚类结果标签映射（更易理解）输出各聚类代表国家

In [None]:

time_df = model_df[["Year", "Per_Capita_GDP", "Per_Capita_Energy", "Per_Capita_GHG"]].groupby("Year").mean().reset_index()


X_time = scaler.fit_transform(time_df[["Per_Capita_GDP", "Per_Capita_Energy"]])
y_time = time_df["Per_Capita_GHG"].values
time_model = LinearRegression()
time_model.fit(X_time, y_time)


future_years = np.arange(2021, 2031)

gdp_growth = (time_df["Per_Capita_GDP"].iloc[-1] / time_df["Per_Capita_GDP"].iloc[0]) ** (1/10) - 1
energy_growth = (time_df["Per_Capita_Energy"].iloc[-1] / time_df["Per_Capita_Energy"].iloc[0]) ** (1/10) - 1

future_gdp = [time_df["Per_Capita_GDP"].iloc[-1] * (1 + gdp_growth) ** (i) for i in range(10)]
future_energy = [time_df["Per_Capita_Energy"].iloc[-1] * (1 + energy_growth) ** (i) for i in range(10)]


future_X = scaler.transform(np.column_stack([future_gdp, future_energy]))
future_ghg = time_model.predict(future_X)


plt.figure(figsize=(10, 5), dpi=150)

plt.plot(time_df["Year"], time_df["Per_Capita_GHG"], color="#2A9D8F", linewidth=3, marker="o", label="2010-2020 实际值")

plt.plot(future_years, future_ghg, color="#E76F51", linewidth=3, linestyle="--", marker="s", label="2021-2030 预测值")


plt.title("2010-2030 全球人均CO₂排放历史+预测趋势", fontsize=12)
plt.xlabel("年份", fontsize=11)
plt.ylabel("人均CO₂排放（吨/人）", fontsize=11)
plt.grid(alpha=0.2)
plt.legend(fontsize=10)
plt.tight_layout()

save_path = os.path.join(RESULT_DIR, "ghg_prediction_2030.png")
plt.savefig(save_path, dpi=150, facecolor="white")
plt.close()

for year, ghg in zip(future_years, future_ghg):
    print(f"{year}年：{ghg:.1f}吨/人")
print(f"{save_path}")

#分段5：2021-2030年排放预测
1. 构建时间特征
2. 用2010-2020数据预测未来） 
3. 生成2021-2030年预测输入（基于历史趋势外推假设GDP/能源消耗按历史年均增速变化
4. 预测2021-2030年人均排放
5. 可视化历史+预测趋势
历史数据（2010-2020）
预测数据（2021-2030）