In [1]:
from __future__ import division
import os

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def sum_of_squares(values):
    """
    Parameters:
    -----------
    values:  list of values to square and then sum.

    Equation:
    ---------
    sum( [i**2 for i in values] )

    """
    return sum( [i**2 for i in values] )


def root_mean_square(values):
    """
    Parameters:
    -----------
    values:  list of values to compute the rms of.

    Equation:
    ---------
    np.sqrt( sum_of_squares(values) / len(values) )
    where 
    sum_of_squares(values) equation is:
        sum( [i**2 for i in values] )
    """
    return np.sqrt( sum_of_squares(values) / len(values) )


def weighted_average(values, ndatas):
    """
    values:  list, of values that need to be weighted by the
                number of data points in their respective samples. 
    ndatas:  list, of the number of data points in the samples. 
    
    Returns:
    --------
    Returns the individual weights in a list and then the sum of the 
    weights (which is the weighted average).

    We pull the N into the calculation of each ith value since this 
    gives us the individual weights. However, this equation is 
    typically shown with N pulled outside. 

    WA = (1/N)*SUM(n_i * v_i)  
    where n_i is the number of data points in each ith iteration and
    v_i is the value associated with the ith iteration. 
    If this were weighted averages of sigma:
    WA = (1/N) * SUM(n_i * sigma_i)

    n = [13, 32, 35]
    sigma = [0.015, 0.104, 0.096]
    then N = 13+32+35 = 80
     
    
    """
    N = np.sum(ndatas)
    ##          sigma*(n/N)
    weights = [(v*(n/N)) for n,v in zip(ndatas, values)]
    return weights, np.sum(weights)

${\displaystyle{\bar {x}}={\frac {\sum \limits _{i=1}^{n}w_{i}x_{i}}{\sum \limits _{i=1}^{n}w_{i}}}}$

For the weighted mean of a list of data for which each element $x_i$ potentially comes from a different probability distribution with known variance, $\sigma_i^2$, one possible choice for the weights is given by the reciprocal of variance:

${\displaystyle w_i = \frac{1}{\sigma_i^2} = \sigma_i^{-2}}$



where 

${\displaystyle \sigma _{\bar {x}}=\left({\sqrt {\sum _{i=1}^{n}{w_{i}}}}\right)^{-1}}$

thus,

${\displaystyle{\bar {x}}={\frac {\sum \limits _{i=1}^{n}w_{i}x_{i}}{\sum \limits _{i=1}^{n}w_{i}}} = \frac {\sum \limits _{i=1}^{n}\sigma_i^{-2}x_{i}}{\sum \limits _{i=1}^{n}\sigma_i^{-2}}}$

$\sigma_i$ are the uncertainties on the $x_i$

In [3]:
def weighted_mean(values, errors):
    """
    https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
    """
    a = []
    b = []
    for val,err in zip(values,errors):
        a.append(val * err**-2)
        b.append(err**-2)
    a = np.nansum(a)
    b = np.nansum(b)
    return a/b

def weighted_mean_uncertainty(errors):
    a = []
    for err in errors:
        a.append(err**-2.)
    a = np.nansum(a)
    a = a**-0.5
    return a

In [4]:
def mu(values, errors):
    a = []
    b = []
    for val,err in zip(values,errors):
        a.append(val/(err**2))
        b.append(err**-2.)
    a = np.nansum(a)
    b = np.nansum(b)
    return a/b

def mu_unc(errors):
    a = []
    for err in errors:
        a.append(err**-2.)
    a = np.nansum(a)
    a = a**-0.5
    return a

In [38]:
newcolnames = ['trigger', 'name', 'number', 'z', 't90', 'detector', 'LATburst', 'flux', 
               'flux_err_low', 'flux_err_up', 'fluence', 'fluence_err_low', 'fluence_err_up', 
               'DL4', 'DL5', 'DL6', 
               'eiso4', 'eiso4_err_low', 'eiso4_err_up', 'eiso4_err', 
               'eiso5', 'eiso5_err_low', 'eiso5_err_up', 'eiso5_err']

In [39]:
df2.columns = newcolnames

In [42]:
df = pd.merge(df1, df2)

In [43]:
df.head()

Unnamed: 0,trigger,name,number,z,t90,detector,LATburst,flux,flux_err_low,flux_err_up,...,DL5,DL6,eiso4,eiso4_err_low,eiso4_err_up,eiso4_err,eiso5,eiso5_err_low,eiso5_err_up,eiso5_err
0,bn080916009,080916C,5,4.35,62.977,L,Y,2e-06,1e-06,2e-06,...,1.3046679999999998e+29,1.130712e+29,3.802328e+54,3.725106e+54,3.87955e+54,7.722191e+52,2.8559710000000003e+54,2.7979680000000002e+54,2.913973e+54,5.800224e+52
1,bn090323002,90323,13,3.57,135.17,L,Y,1e-06,1e-06,1e-06,...,1.032153e+29,8.94533e+28,4.508223e+54,4.4084170000000005e+54,4.608028e+54,9.980578e+52,3.386176e+54,3.311211e+54,3.461141e+54,7.496523e+52
2,bn090328401,090328A,14,0.736,61.697,L,Y,1e-06,1e-06,1e-06,...,1.49948e+28,1.299549e+28,1.294631e+53,1.257364e+53,1.331899e+53,3.726721e+51,9.72412e+52,9.444202e+52,1.000404e+53,2.7991819999999997e+51
3,bn090510016,90510,17,0.903,0.96,L,Y,1.3e-05,1.2e-05,1.4e-05,...,1.92725e+28,1.670283e+28,3.035183e+52,2.824113e+52,3.246253e+52,2.110703e+51,2.27976e+52,2.121222e+52,2.438297e+52,1.585373e+51
4,bn090902462,090902B,20,1.822,19.328,L,Y,2e-05,2e-05,2.1e-05,...,4.576146e+28,3.965993e+28,3.688423e+54,3.6597900000000003e+54,3.7170570000000004e+54,2.863379e+52,2.7704160000000003e+54,2.748909e+54,2.791923e+54,2.150716e+52


In [44]:
df.columns

Index([u'trigger', u'name', u'number', u'z', u't90', u'detector', u'LATburst',
       u'flux', u'flux_err_low', u'flux_err_up', u'fluence',
       u'fluence_err_low', u'fluence_err_up', u'DL1', u'DL2', u'DL3', u'eiso1',
       u'eiso1_err_low', u'eiso1_err_up', u'eiso1_err', u'eiso2',
       u'eiso2_err_low', u'eiso2_err_up', u'eiso2_err', u'eiso3',
       u'eiso3_err_low', u'eiso3_err_up', u'eiso3_err', u'DL4', u'DL5', u'DL6',
       u'eiso4', u'eiso4_err_low', u'eiso4_err_up', u'eiso4_err', u'eiso5',
       u'eiso5_err_low', u'eiso5_err_up', u'eiso5_err'],
      dtype='object')

In [45]:
cols = ['eiso1', 'eiso2', 'eiso3', 'eiso4', 'eiso5']

In [46]:
bursts = df.trigger.tolist()

In [53]:
col1 = 'eiso4'
col2 = 'eiso5'

deltas = df[col1].apply(np.log10) - df[col2].apply(np.log10)
print(root_mean_square(deltas))
print('%.3f'%root_mean_square(deltas))

0.124295813498
0.124


In [54]:
kim = []

In [60]:
col1 = 'eiso4'
col2 = 'eiso5'

deltas = df[col1].apply(np.log10) - df[col2].apply(np.log10)
kim.append(root_mean_square(deltas))
print('%.3f'%root_mean_square(deltas))

0.124


In [61]:
kim

[0.095801609043162428,
 0.18531024751783443,
 0.28033038551270434,
 0.036632674448362122,
 0.087663139049268229,
 0.12429581349763033]

In [62]:
min(kim), max(kim), np.mean(kim)

(0.036632674448362122, 0.28033038551270434, 0.13500564484482699)

In [65]:
np.mean(kim[:3]), np.mean(kim[3:])

(0.18714741402456705, 0.082863875665086892)

In [67]:
print('%.3f  %.3f'%(np.min(kim[:3]) , np.max(kim[:3])))

0.096  0.280


In [68]:
print('%.3f  %.3f'%(np.min(kim[3:]) , np.max(kim[3:])))

0.037  0.124


In [None]:
np.mean(kim[3:])

In [None]:
mean_eisos = []
for burst in bursts:
    mean_eisos.append(df[df['trigger'] == burst][cols].values.mean())

In [None]:
deltas = df['eiso1'].apply(np.log10) - df['eiso2'].apply(np.log10)

In [None]:
np.sqrt( np.sum(deltas**2)/len(deltas) )

In [None]:
root_mean_square(deltas)