In [None]:
from utils import *
from matplotlib.lines import Line2D
from scipy.stats import ttest_rel

In [2]:
dic_region = {"Northern Africa and Western Asia": "N. Africa W. Asia", "Sub-Saharan Africa": "Sub-Saharan Africa", "Central Asia and Russian Federation": "C. Asia", "Eastern Asia": "E. Asia", "Southern Asia": "S. Asia", "Southeastern Asia": "S. Asia", "Northern America": "N. America", "Latin America and the Caribbean": "Latin America", "Western Europe": "W. Europe", "Eastern and South-Eastern Europe": "E. Europe", "Oceania and Australia": "Oceania",}
gdf_countries = gdf_world.reset_index(drop=True).reset_index().rename(columns={"index": "idx"})
gdf_countries["regi_short"] = gdf_countries["regi_pnas"].map(dic_region)
country_to_region = gdf_countries.set_index("name_long")["regi_short"].to_dict()

In [43]:
def load_df_diff_matched(radius):
    df_matched_m_lst = []
    df_diff_m_lst = []
    for year in range(2002, 2023):
        df_matched_m_lst.append(pd.read_csv(path_data / f"PSM_results/matched/{radius}km_{year}.csv", index_col=0))
        df_diff_m_lst.append(pd.read_csv(path_data / f"PSM_results/diff/{radius}km_{year}.csv", index_col=0))
    df_diff_m_all = pd.concat(df_diff_m_lst)
    df_matched_m_all = pd.concat(df_matched_m_lst)
    return df_matched_m_all, df_diff_m_all

# ttest function

In [15]:
def ttest_1luc(df_matched_m_all, luc="crop"):
    df_match_m_1luc = df_matched_m_all.reset_index(drop=True)
    df_match_m_1luc["region"] = df_match_m_1luc["country"].map(country_to_region)

    df_diff_m_1luc = df_diff_m_all.reset_index(drop=True)
    df_diff_m_1luc["region"] = df_diff_m_1luc["country"].map(country_to_region)

    df_npp_diff_country = pd.read_csv(path_data / f"npp/{luc}_npp_diff.csv").rename(columns={"name_long": "country"})
    df_npp_diff_country = df_npp_diff_country.melt(id_vars="country", var_name="year", value_name=f"{luc}_npp_diff_country")\
        .assign(year=lambda _df: _df["year"].astype(int))
    df_match_m_1luc = df_match_m_1luc.merge(df_npp_diff_country, on=["country", "year"], how="left")
    df_match_m_1luc.loc[:, f"{luc}_npp_change_n"] = df_match_m_1luc[[f"{luc}_npp_change_n", f"{luc}_npp_diff_country"]].bfill(axis=1).iloc[:, 0]
        
    ttest_1luc_data = []
    for (year, country), df_gp in list(df_match_m_1luc.groupby(["year", "country"])):
        df_gp = df_gp[[f"{luc}_change", f"{luc}_change_n"]].copy().dropna()
        if df_gp.shape[0] < 5:
            t_stat, t_p, diff_mean, diff_std = np.nan, np.nan, np.nan, np.nan
        else:
            t_stat, t_p = ttest_rel(df_gp[f"{luc}_change"].values, df_gp[f"{luc}_change_n"].values)
            diff_mean = np.mean(df_gp[f"{luc}_change"].values - df_gp[f"{luc}_change_n"].values)
            diff_std = np.std(df_gp[f"{luc}_change"].values - df_gp[f"{luc}_change_n"].values)
        ttest_1luc_data.append([
            year, country, t_stat, t_p, diff_mean, diff_std
        ])
    df_1luc_change_tt_test = pd.DataFrame(ttest_1luc_data, columns=[
        "year", "country", "t_stat", "t_p", "diff_mean", "diff_std"
    ])
    
    ttest_1luc_data = []
    for (year, country), df_gp in list(df_match_m_1luc.groupby(["year", "country"])):
        with warnings.catch_warnings(record=True) as w:
            df_gp = df_gp[[f"{luc}_npp_change", f"{luc}_npp_change_n"]].copy().dropna()
            if df_gp.shape[0] < 5:
                t_stat, t_p, diff_mean, diff_std = np.nan, np.nan, np.nan, np.nan
            else:
                t_stat, t_p = ttest_rel(df_gp[f"{luc}_npp_change"].values, df_gp[f"{luc}_npp_change_n"].values)
                diff_mean = np.mean(df_gp[f"{luc}_npp_change"].values - df_gp[f"{luc}_npp_change_n"].values)
                diff_std = np.std(df_gp[f"{luc}_npp_change"].values - df_gp[f"{luc}_npp_change_n"].values)
                if w:
                    for warning in w:
                        print(year, country)
            ttest_1luc_data.append([
                year, country, t_stat, t_p, diff_mean, diff_std])
    df_1luc_npp_change_tt_test = pd.DataFrame(ttest_1luc_data, columns=[
        "year", "country", "t_stat", "t_p", "diff_mean", "diff_std"
    ])
    return df_1luc_change_tt_test, df_1luc_npp_change_tt_test

# combine ttest

In [45]:
def combine_ttest_1lucs(tt_test_results, tt_test_col):
    tt_test_reclass_lst = []
    for tt_test_df, col_ in zip(tt_test_results, tt_test_col):
        tt_test_reclass = tt_test_df\
            .assign(sig=lambda _df: (_df["t_p"]<0.05).astype(int))\
            .assign(increase=lambda _df: (_df["t_stat"]>0).astype(int)* 2 - 1)\
            .assign(type_=lambda _df: _df["sig"] * _df["increase"])\
            .rename(columns={"type_": f"{col_}"})\
            .set_index(["year", "country"])[[f"{col_}"]]
        
        tt_test_reclass_lst.append(tt_test_reclass)
    df_ttest_merge = pd.DataFrame(index=tt_test_reclass.index).copy()
    for tt_test_reclass in tt_test_reclass_lst:
        df_ttest_merge = df_ttest_merge.join(tt_test_reclass, how="outer")
    df_ttest_merge.fillna(0)
    return df_ttest_merge

# pipline

In [46]:
def ttest_pipline(radius, out_file):
    df_matched_m_all, df_diff_m_all = load_df_diff_matched(radius)
    
    df_crop_change_tt_test, df_crop_npp_change_tt_test = ttest_1luc(df_matched_m_all, luc="crop")
    df_forest_change_tt_test, df_forest_npp_change_tt_test = ttest_1luc(df_matched_m_all, luc="forest")
    df_grass_change_tt_test, df_grass_npp_change_tt_test = ttest_1luc(df_matched_m_all, luc="grass")
    
    tt_test_results = [df_crop_change_tt_test, df_crop_npp_change_tt_test, df_forest_change_tt_test, df_forest_npp_change_tt_test, df_grass_change_tt_test, df_grass_npp_change_tt_test, ]
    tt_test_col = ["crop_change", "crop_npp_change", "forest_change", "forest_npp_change", "grass_change", "grass_npp_change", ]
    
    df_ttest_merge = combine_ttest_1lucs(tt_test_results, tt_test_col)
    df_ttest_merge.to_csv(out_file)
    

In [49]:
radius = 10
if not (out_file := path_data / f"PSM_results/ttest/{radius}km.csv").exists():
    ttest_pipline(radius, out_file)