__Trend analysis of temperature anomalies over time and forcing for observation data.__

__The code is adapted from:__ https://github.com/hausfath/OldModels/blob/master/notebooks/obs_temp_forcing_analysis.ipynb

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import numpy_indexed as npi
import pandas as pd
import os
import statsmodels.formula.api as smf
import statsmodels.api as sm
import warnings
warnings.filterwarnings(action='once')

forcing_file = '../data/preprocessed/forcing_ensemble_base1961-1990.npy'
temps_file = '../data/preprocessed/Observations_TAnom.csv'
interim_path = "../data/TCR_interim/" # save interim data here

In [7]:
forcings = np.load(forcing_file, allow_pickle=True).item()
annual_temps = pd.read_csv(temps_file, skiprows=10)

timeframes = ([1970, 2000], [1971, 2000], [1972, 2000], [1975, 2010], [1977, 2017], 
              [1981, 2017], [1988, 2017], [1990, 2017], [1993, 2017], [1995, 2017], 
              [2001, 2017], [2007, 2017])

In [None]:
def coef_arma_cis(y_data, x_data):
    '''
    Calculate coefficients and CIs using OLS
    '''
    X = x_data
    X = sm.add_constant(X)
    smresults = sm.OLS(y_data, X).fit()
    ols_coef = smresults.params[1]
    ols_ci = ols_coef - smresults.conf_int(alpha=0.05, cols=None)[0][1]
    ci_lower = smresults.conf_int(alpha=0.05, cols=None)[0][1]
    ci_upper = smresults.conf_int(alpha=0.05, cols=None)[1][1]
    sd = (ci_upper - ols_coef) / 2.
    return {
            'coef' : ols_coef,
            'ci_lower' : ci_lower,
            'ci_upper' : ci_upper,
            'sd' : sd
    }

def obs_temp_time_trends(annual_temps, timeframes):
    coef_mean, coef_sd, ci_mean, timeframe = [], [], [], []
    for times in timeframes:
        print('Analyzing the period from ', times[0], ' to ', times[1])
        coef, ci_lower, ci_upper, rf_number, obs_series = [], [], [], [], []
        start_year = times[0]
        end_year = times[1]
        temp_year_range = np.where((annual_temps['year'] >= start_year) & (annual_temps['year'] <= end_year))[0]

        for obs_temps in (['hadcrut4', 'gistemp', 'noaa', 'berkeley', 'cowtan_way']):
            years = annual_temps['year'][temp_year_range]
            temp_values = annual_temps[obs_temps][temp_year_range]
            results = coef_arma_cis(temp_values, years)
            coef.append(results['coef'])
            ci_lower.append(results['ci_lower'])
            ci_upper.append(results['ci_upper'])
            obs_series.append(obs_temps)
        df = pd.DataFrame({'coef' : coef,
                           'ci_lower' : ci_lower,
                           'ci_upper' : ci_upper,
                           'obs_series' : obs_series})
        df['ci_val'] = df['coef'] - df['ci_lower']
        coef_mean.append(df['coef'].mean())
        coef_sd.append(df['coef'].std())
        ci_mean.append(df['ci_val'].mean())
        timeframe.append(str(times))
    
    df = pd.DataFrame({'coef_mean' : coef_mean,
                       'coef_sd' : coef_sd,
                       'ci_mean' : ci_mean,
                       'timeframe' : timeframe})
    uncertainty = ((df['coef_sd']*2)**2 + df['ci_mean']**2)**(0.5)
    df['coef_low'] = df['coef_mean'] - uncertainty
    df['coef_high'] = df['coef_mean'] + uncertainty
    df.to_csv(interim_path+'obs_time_trends.csv')

def obs_temp_forcing_trends(forcings, annual_temps, timeframes):
    coef_mean, coef_sd, ci_mean, timeframe = [], [], [], []

    for times in timeframes:
        print('Analyzing the period from ', times[0], ' to ', times[1])
        coef, ci_lower, ci_upper, rf_number, obs_series = [], [], [], [], []
        start_year = times[0]
        end_year = times[1]
        rf_anthro = np.swapaxes(forcings['rf_anthro'],0,1)   
        rf_year_range = np.where((forcings['year'] >= start_year) & (forcings['year'] <= end_year))[0]
        temp_year_range = np.where((annual_temps['year'] >= start_year) & (annual_temps['year'] <= end_year))[0]
        
        for rf_num in range(1000):
            for obs_temps in (['hadcrut4', 'gistemp', 'noaa', 'berkeley', 'cowtan_way']):
                rf_values = rf_anthro[rf_num][rf_year_range]
                temp_values = annual_temps[obs_temps][temp_year_range]
                results = coef_arma_cis(temp_values, rf_values)
                coef.append(results['coef'])
                ci_lower.append(results['ci_lower'])
                ci_upper.append(results['ci_upper'])
                rf_number.append(rf_num)
                obs_series.append(obs_temps)
        df = pd.DataFrame({'coef' : coef,
                           'ci_lower' : ci_lower,
                           'ci_upper' : ci_upper,
                           'rf_number' : rf_number,
                           'obs_series' : obs_series})
        df['ci_val'] = df['coef'] - df['ci_lower']
        coef_mean.append(df['coef'].mean())
        coef_sd.append(df['coef'].std())
        ci_mean.append(df['ci_val'].mean())
        timeframe.append(str(times))
    
    df = pd.DataFrame({'coef_mean' : coef_mean,
                       'coef_sd' : coef_sd,
                       'ci_mean' : ci_mean,
                       'timeframe' : timeframe})
    uncertainty = ((df['coef_sd']*2)**2 + df['ci_mean']**2)**(0.5)
    df['coef_low'] = df['coef_mean'] - uncertainty
    df['coef_high'] = df['coef_mean'] + uncertainty
    df.to_csv(interim_path+'obs_forcing_trends.csv')

#obs_temp_time_trends(annual_temps, timeframes)
#obs_temp_forcing_trends(forcings, annual_temps, timeframes)

In [None]:
def obs_forcing_rate(forcings, annual_temps, timeframes):
    coef_mean, coef_sd, ci_mean, timeframe = [], [], [], []
    
    for times in timeframes:
        print('Analyzing the period from ', times[0], ' to ', times[1])
        coef, ci_lower, ci_upper, rf_number, obs_series = [], [], [], [], []
        start_year = times[0]
        end_year = times[1]
        rf_anthro = np.swapaxes(forcings['rf_anthro'],0,1)   
        rf_year_range = np.where((forcings['year'] >= start_year) & (forcings['year'] <= end_year))[0]
        temp_year_range = np.where((annual_temps['year'] >= start_year) & (annual_temps['year'] <= end_year))[0]
        
        for rf_num in range(1000):
            for obs_temps in (['hadcrut4', 'gistemp', 'noaa', 'berkeley', 'cowtan_way']):
                rf_values = rf_anthro[rf_num][rf_year_range]
                years = annual_temps['year'][temp_year_range]
                results = coef_arma_cis(rf_values, years)
                coef.append(results['coef'])
                ci_lower.append(results['ci_lower'])
                ci_upper.append(results['ci_upper'])
                rf_number.append(rf_num)
                obs_series.append(obs_temps)
        df = pd.DataFrame({'coef' : coef,
                           'ci_lower' : ci_lower,
                           'ci_upper' : ci_upper,
                           'rf_number' : rf_number,
                           'obs_series' : obs_series})
        df['ci_val'] = df['coef'] - df['ci_lower']
        coef_mean.append(df['coef'].mean())
        coef_sd.append(df['coef'].std())
        ci_mean.append(df['ci_val'].mean())
        timeframe.append(str(times))
    
    df = pd.DataFrame({'coef_mean' : coef_mean,
                       'coef_sd' : coef_sd,
                       'ci_mean' : ci_mean,
                       'timeframe' : timeframe})
    uncertainty = ((df['coef_sd']*2)**2 + df['ci_mean']**2)**(0.5)
    df['coef_low'] = df['coef_mean'] - uncertainty
    df['coef_high'] = df['coef_mean'] + uncertainty
    df.to_csv(interim_path+'obs_forcing_rate.csv')

#obs_forcing_rate(forcings, annual_temps, timeframes)

In [10]:
def obs_forcing_timeseries(forcings):
    forcings['rf_total'].shape
    std = np.std(forcings['rf_total'], axis=1)
    mean = np.mean(forcings['rf_total'], axis=1)
    
    df = pd.DataFrame({'year' : forcings['year'],
                       'forcing_mean' : mean,
                       'forcing_std' : std})
    df.to_csv(interim_path+'forcing_timeseries.csv')

#obs_forcing_timeseries(forcings)