In [1]:
import pandas as pd
import numpy as np
import torch
import pickle

from sklearn.metrics import r2_score

In [2]:
from analysis_utils.CombinedModel import CombinedModel
from analysis_utils.flagged_uids import flagged_uids
from analysis_utils.spatial_CV import *
from analysis_utils.variable_names import *

In [3]:
# set the global file paths
root_data_dir = "../../Data"

# the lsms data
lsms_pth = f"{root_data_dir}/lsms/processed/labels_cluster_v1.csv"

# load the feature and the label data
lsms_df = pd.read_csv(lsms_pth)

# remove flagged ids form dataset
lsms_df = lsms_df[~lsms_df.unique_id.isin(flagged_uids)].reset_index()

# add delta id to data
lsms_df['delta_id'] = lsms_df.unique_id

# lsms_df with the normalised target variable
lsms_df_norm = lsms_df.copy()
lsms_df_norm['log_mean_pc_cons_usd_2017'] = (lsms_df_norm['log_mean_pc_cons_usd_2017'] - lsms_df_norm['log_mean_pc_cons_usd_2017'].mean())/lsms_df_norm['log_mean_pc_cons_usd_2017'].std() 

In [4]:
def print_results_dict(dictionary):
    # Get the keys in the dictionary
    keys = list(dictionary.keys())

    # Get the number of entries in each fold
    num_entries = len(dictionary[keys[0]])
    
    # Print the header row with fold names
    header = [f"Fold \t {keys[0]} \t {keys[1]} \t {keys[2]}"]
    print("".join(header))

    # Iterate through the entries and print them in rows
    for i in range(num_entries):
        row = [f"{i+1}"]
        for key in keys:
            value = dictionary[key][i]
            formatted_value = "{:.4f}".format(value)
            row.append('\t\t')
            row.append(formatted_value)
        print("".join(row))

## Consumption expenditure - Full model

In [5]:
# load the within and between model results
results_dir = "results"
between_cons_pth = f"{results_dir}/model_objects/between_cons.pkl"
within_cons_pth = f"{results_dir}/model_objects/within_cons.pkl"

with open(between_cons_pth, 'rb') as f:
    between_cons_cv = pickle.load(f)
    
with open(within_cons_pth, 'rb') as f:
    within_cons_cv = pickle.load(f)

combined_cons = CombinedModel(lsms_df, between_cons_cv, within_cons_cv)
combined_cons.evaluate()
results_cons_combined = combined_cons.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_cons_combined['r2']}")

print('\nTex output:')
combined_cons.print_tex('r2')

print(f"\nMSE: {results_cons_combined['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_cons.res_r2)

R2: {'overall': 0.37354795387299544, 'between': 0.44844445387722087, 'within': 0.008093481748325183}

Tex output:
& 0.4484 & 0.0081 & 0.3735

MSE: {'overall': 0.2189345822075508, 'between': 0.16118160603365803, 'within': 0.055572761879272826}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.5215		0.5764		-0.0039
2		0.3694		0.4192		-0.0032
3		0.4082		0.4810		0.0059
4		0.2888		0.3707		0.0208
5		0.2737		0.3913		0.0208


## Consumption expenditure -- RGB Landsat

In [6]:
# load the within and between model results
results_dir = "results"
between_cons_ls_pth = f"{results_dir}/model_objects/between_cons_LS_cv.pkl"
within_cons_ls_pth = f"{results_dir}/model_objects/within_cons_LS_cv.pkl"

with open(between_cons_ls_pth, 'rb') as f:
    between_cons_ls = pickle.load(f)
    
with open(within_cons_ls_pth, 'rb') as f:
    within_cons_ls = pickle.load(f)

combined_cons_ls = CombinedModel(lsms_df_norm, between_cons_ls, within_cons_ls)

combined_cons_ls.evaluate()
results_cons_combined_ls = combined_cons_ls.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_cons_combined_ls['r2']}")

print('\nTex output:')
combined_cons_ls.print_tex('r2')

print(f"\nMSE: {results_cons_combined_ls['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_cons_ls.res_r2)

R2: {'overall': 0.15151180675754178, 'between': 0.2380614097501642, 'within': -0.012435943199214839}

Tex output:
& 0.2381 & -0.0124 & 0.1515

MSE: {'overall': 0.7553152315945647, 'between': 0.6708351454340425, 'within': 1.012957}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.4152		0.4392		-0.0193
2		0.0710		0.1447		-0.0000
3		0.2432		0.3553		-0.0368
4		-0.0330		0.0970		-0.0048
5		0.0500		0.1474		-0.0013


## Consumption expenditure -- MS images

In [7]:
# load the within and between model results
results_dir = "results"
between_cons_ms_pth = f"{results_dir}/model_objects/between_cons_MS_cv.pkl"
within_cons_ms_pth = f"{results_dir}/model_objects/within_cons_MS_cv.pkl"

with open(between_cons_ms_pth, 'rb') as f:
    between_cons_ms = pickle.load(f)
    
with open(within_cons_ms_pth, 'rb') as f:
    within_cons_ms = pickle.load(f)

combined_cons_ms = CombinedModel(lsms_df_norm, between_cons_ms, within_cons_ms)

combined_cons_ms.evaluate()
results_cons_combined_ms = combined_cons_ms.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_cons_combined_ms['r2']}")

print('\nTex output:')
combined_cons_ms.print_tex('r2')

print(f"\nMSE: {results_cons_combined_ms['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_cons_ms.res_r2)

R2: {'overall': 0.24085925581985226, 'between': 0.32938913554918553, 'within': 0.0019305886908217219}

Tex output:
& 0.3294 & 0.0019 & 0.2409

MSE: {'overall': 0.6731676971974084, 'between': 0.589787778801712, 'within': 0.99882174}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.4871		0.5286		-0.0043
2		0.1954		0.2527		-0.0002
3		0.1638		0.2464		0.0002
4		0.1917		0.3335		0.0004
5		0.1630		0.2855		0.0136


## Consumption expenditure -- RS images

In [8]:
# load the within and between model results
results_dir = "results"
between_cons_rs_pth = f"{results_dir}/model_objects/between_cons_RS_cv.pkl"
within_cons_rs_pth = f"{results_dir}/model_objects/within_cons_RS_cv.pkl"

with open(between_cons_rs_pth, 'rb') as f:
    between_cons_rs = pickle.load(f)
    
with open(within_cons_rs_pth, 'rb') as f:
    within_cons_rs = pickle.load(f)
#within_cons_rs.predictions['delta_id'] = lsms_df.unique_id

combined_cons_rs = CombinedModel(lsms_df_norm, between_cons_rs, within_cons_rs)

combined_cons_rs.evaluate()
results_cons_combined_rs = combined_cons_rs.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_cons_combined_rs['r2']}")
print('\nTex output:')
combined_cons_rs.print_tex('r2')

print(f"\nMSE: {results_cons_combined_rs['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_cons_rs.res_r2)

R2: {'overall': 0.23692466618596075, 'between': 0.3566787434956019, 'within': -0.014295974233413843}

Tex output:
& 0.3567 & -0.0143 & 0.2369

MSE: {'overall': 0.6720954078337877, 'between': 0.558671315795833, 'within': 1.0143683}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.5220		0.5874		-0.0202
2		0.1542		0.2591		-0.0120
3		0.2386		0.4512		-0.0493
4		0.1466		0.2392		0.0125
5		0.1142		0.2386		-0.0026


# Asset index - Full model

In [9]:
# load the within and between model results
results_dir = "results"
between_asset_pth = f"{results_dir}/model_objects/between_asset.pkl"
within_asset_pth = f"{results_dir}/model_objects/within_asset.pkl"

with open(between_asset_pth, 'rb') as f:
    between_asset_cv = pickle.load(f)
    
with open(within_asset_pth, 'rb') as f:
    within_asset_cv = pickle.load(f)

combined_asset= CombinedModel(lsms_df, between_asset_cv, within_asset_cv)
combined_asset.evaluate()
results_asset_combined = combined_asset.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_asset_combined['r2']}")

print('\nTex output:')
combined_asset.print_tex('r2')

print(f"\nMSE: {results_asset_combined['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_asset.res_r2)

R2: {'overall': 0.5287710011382827, 'between': 0.5640425172460577, 'within': 0.032482750018734505}

Tex output:
& 0.5640 & 0.0325 & 0.5288

MSE: {'overall': 1.0486109882435282, 'between': 1.060152961844711, 'within': 0.07321724432322547}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.6026		0.6594		0.0148
2		0.6075		0.6112		0.0464
3		0.5249		0.5539		0.0489
4		0.5088		0.5353		0.0368
5		0.3971		0.4543		0.0156


## Asset index -- RGB images

In [10]:
# load the within and between model results
results_dir = "results"
between_asset_ls_pth = f"{results_dir}/model_objects/between_asset_LS_cv.pkl"
within_asset_ls_pth = f"{results_dir}/model_objects/within_asset_LS_cv.pkl"

with open(between_asset_ls_pth, 'rb') as f:
    between_asset_ls = pickle.load(f)
    
with open(within_asset_ls_pth, 'rb') as f:
    within_asset_ls = pickle.load(f)

combined_asset_ls = CombinedModel(lsms_df_norm, between_asset_ls, within_asset_ls)

combined_asset_ls.evaluate()
results_asset_combined_ls = combined_asset_ls.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_asset_combined_ls['r2']}")

print('\nTex output:')
combined_asset_ls.print_tex('r2')

print(f"\nMSE: {results_asset_combined_ls['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_asset_ls.res_r2)

R2: {'overall': 0.3237998095240234, 'between': 0.4619510149523673, 'within': -0.05570842192597003}

Tex output:
& 0.4620 & -0.0557 & 0.3238

MSE: {'overall': 1.5146265488449118, 'between': 0.49836870506966024, 'within': 1.0461318}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.4408		0.5943		0.0039
2		0.3819		0.5542		-0.0165
3		0.1932		0.3949		-0.2828
4		0.3615		0.4247		0.0168
5		0.2440		0.3356		0.0000


## Asset index -- RS images

In [11]:
# load the within and between model results
results_dir = "results"
between_asset_rs_pth = f"{results_dir}/model_objects/between_asset_RS_cv.pkl"
within_asset_rs_pth = f"{results_dir}/model_objects/within_asset_RS_cv.pkl"

with open(between_asset_rs_pth, 'rb') as f:
    between_asset_rs = pickle.load(f)
    
with open(within_asset_rs_pth, 'rb') as f:
    within_asset_rs = pickle.load(f)

combined_asset_rs = CombinedModel(lsms_df_norm, between_asset_rs, within_asset_rs)

combined_asset_rs.evaluate()
results_asset_combined_rs = combined_asset_rs.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_asset_combined_rs['r2']}")

print('\nTex output:')
combined_asset_rs.print_tex('r2')

print(f"\nMSE: {results_asset_combined_rs['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_asset_rs.res_r2)

R2: {'overall': 0.3267188252317363, 'between': 0.34636288760358247, 'within': -0.0010206133378735326}

Tex output:
& 0.3464 & -0.0010 & 0.3267

MSE: {'overall': 1.505437637766029, 'between': 0.6052104550924965, 'within': 0.9994925}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.4667		0.4450		-0.0023
2		0.4096		0.5175		0.0116
3		0.3632		0.3878		-0.0179
4		0.1445		0.0974		0.0145
5		0.2465		0.2793		-0.0111


## Asset Index - MS images

In [12]:
# load the within and between model results
results_dir = "results"
between_asset_ms_pth = f"{results_dir}/model_objects/between_asset_MS_cv.pkl"
within_asset_ms_pth = f"{results_dir}/model_objects/within_asset_MS_cv.pkl"

with open(between_asset_ms_pth, 'rb') as f:
    between_asset_ms = pickle.load(f)
    
with open(within_asset_ms_pth, 'rb') as f:
    within_asset_ms = pickle.load(f)

combined_asset_ms = CombinedModel(lsms_df_norm, between_asset_ms, within_asset_ms)

combined_asset_ms.evaluate()
results_asset_combined_ms = combined_asset_ms.compute_overall_performance(use_fold_weights = True)
print(f"R2: {results_asset_combined_ms['r2']}")

print('\nTex output:')
combined_asset_ms.print_tex('r2')

print(f"\nMSE: {results_asset_combined_ms['mse']}")

print('\nFold Performance (R2):')
print_results_dict(combined_asset_ms.res_r2)

R2: {'overall': 0.36037754850405057, 'between': 0.4549363977852485, 'within': -0.021413154280719993}

Tex output:
& 0.4549 & -0.0214 & 0.3604

MSE: {'overall': 1.4198382373183949, 'between': 0.5083236585938392, 'within': 1.0136703}

Fold Performance (R2):
Fold 	 overall 	 between 	 within
1		0.5495		0.5229		-0.0000
2		0.4040		0.5293		0.0234
3		0.3163		0.4315		0.0147
4		0.3643		0.4619		0.0046
5		0.1621		0.3216		-0.1498


# Save results

In [13]:
dl_res = {}

dl_res['cons_combined'] = copy.deepcopy(results_cons_combined['r2'])
dl_res['cons_rgb'] = copy.deepcopy(results_cons_combined_ls['r2'])
dl_res['cons_ms'] = copy.deepcopy(results_cons_combined_ms['r2'])
dl_res['cons_rs'] = copy.deepcopy(results_cons_combined_rs['r2'])

dl_res['asset_combined'] = copy.deepcopy(results_asset_combined['r2'])
dl_res['asset_rgb'] = copy.deepcopy(results_asset_combined_ls['r2'])
dl_res['asset_ms'] = copy.deepcopy(results_asset_combined_ms['r2'])
dl_res['asset_rs'] = copy.deepcopy(results_asset_combined_rs['r2'])

# rename the entries of each dictionary:
for model_name, res in dl_res.items():
    for k, v in list(res.items()):
        new_key = k + "_r2"
        res[new_key] = v
        del res[k]
    dl_res[model_name] = res


with open("results/deep_learning_res.pkl", 'wb') as f:
    pickle.dump(dl_res, f)