# Supplementary Methods S1: Global albedo

Based on Tables 1-6 in:
Elena A. Tsvetsinskaya, Crystal B. Schaaf, Feng Gao, Alan H. Strahler, Robert E. Dickinson. 2006. Spatial and temporal variability in Moderate Resolution Imaging Spectroradiometer–derived surface albedo over global arid regions. https://doi.org/10.1029/2005JD006772

### Usage
1. Create a parent folder data/global_albedo/
2. Go to https://doi.org/10.1029/2005JD006772 and download the supplementary information files into that folder

In [None]:
import pandas as pd
import numpy as np
from scipy import stats

import re
import glob

from plotnine import *

In [None]:
project_path = './'

# Raw data from the paper
data_path   = project_path + '../data/global_albedo/'

# Output for combined data file (without stddevs)
output_path = project_path + '../data/global_albedo/'
output_fn = 'jgrd12712-combined_tables.csv'

# In case we want to make a figure
graphs_path = project_path + '../graphs/'

### Functions

In [None]:
def load_raw_paper_data(fn, silent=False):
    if(not silent):
        print('  -', fn.split('/')[-1], end=': ')
    # Read the data
    df = pd.read_csv(fn, skiprows=2, skipfooter=1, engine='python',sep='\t', thousands=',')
    # Read the region
    with open(fn) as f:
        first_line = f.readline()
    region = re.sub(r'^.*?Arid Regions of ', '', first_line)
    region = re.sub(r', by FAO Soil Groupa\n', '', region)
    print(region)
    
    # Add region column in the beginning
    df['Region'] = region
    col = df.pop('Region')
    df.insert(0, 'Region', col)
    
    # Modify the text data to numbers:
    # Remove stddevs
    df['Pixel Number'] = df['Pixel Number'].str.replace(r"\(.*\)","", regex=True)
    df['300-700 nm']   = df['300-700 nm'].str.replace(r"\(.*\)","", regex=True)
    df['700-5,000 nm'] = df['700-5,000 nm'].str.replace(r"\(.*\)","", regex=True)
    df['300-5,000 nm'] = df['300-5,000 nm'].str.replace(r"\(.*\)","", regex=True)
    # Remove commas
    df['Pixel Number'] = df['Pixel Number'].str.replace(',','')
    # Convert to numbers
    df['Pixel Number'] = pd.to_numeric(df['Pixel Number'])
    df['300-700 nm']   = pd.to_numeric(df['300-700 nm'])
    df['700-5,000 nm'] = pd.to_numeric(df['700-5,000 nm'])
    df['300-5,000 nm'] = pd.to_numeric(df['300-5,000 nm'])
    
    return(df)

def load_all_paper_data(directory, silent=False):
    file_list = sorted(glob.glob(directory + '**/*.txt', recursive=True))
    
    data_list = []
    for idx, filename in enumerate(file_list):
        temp = load_raw_paper_data(filename, silent=silent)
        data_list.append(temp)
    # Combine all the read data
    df = pd.concat(data_list, axis=0, ignore_index=True)
    return(df)

In [None]:
print('Loading data...')
df = load_all_paper_data(data_path)

# Save completed df to csv file
print('  - Saving complete df to:', output_fn)
df.to_csv(output_path + output_fn, index=False)

print('Done...')
print(df.columns.values)

In [None]:
def weighted_avg_and_std(values, weights):
    # Return the weighted average and standard deviation.
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return (average, np.sqrt(variance))

print('Global mean albedo:')
mean_std = weighted_avg_and_std(df['300-5,000 nm'], weights=df['Pixel Number'])
print(np.round(mean_std[0], 2), '(' + str(np.round(mean_std[1], 2)) + ')')