# Running mean diagnostics
Attempt to see if there is a more efficient way to compute running window diagnostics over array.

Initial time testing suggest that this new method is rubbish! At about 100x slower...

Removing the np.append function and accumulating variables as an interative sum is much better but still slower over larger matrices 

In [1]:
import numpy as np
import copy # For deep copying variables
from amm60_data_tools import window_strat # compute running window filtered stratification

import matplotlib.pyplot as plt  # plotting
%matplotlib inline






In [10]:
profile=np.random.rand(750,3,370,530)*1E3
[nt,nz,ny,nx] = np.shape(profile)
time_counter = np.arange(nt)
H = np.ones((nt,ny,nx))*100

winsiz = 3*24
doodbuff = 19
dt = 3
strat_3day = np.squeeze( window_strat(profile, time_counter, H ) )

var = (profile[:,0,:,:] - profile[:,2,:,:])/H # i.e. strat

In [11]:
print np.shape(profile)
print np.shape(strat_3day)
print np.shape(var)

(750, 3, 370, 530)
(213, 370, 530)
(750, 370, 530)


In [12]:
def runwin(var,dt,winsiz,doodbuff,op):
    """
    Function to compute running window operation.
    Append repeat matrices rolled in the time axis onto a new dimension at axis=0

    INPUT:
        var
        dt = 3
        winsiz  = 3*24
        doodbuff = 19
        op - operation. Either:
            op = 'mean'
            op = 'var'
            op = 'std'
    OUTPUT:
        runwin_var

    USEAGE:
        runwin_var = runwin(var,3,3*24,19,'mean')
    """


    nt = np.shape(var)[0]

#    # Compile all time slices for operation in new dimension
#    tt = var[np.newaxis,:] #Doesn't matter how many dims var has, as long as the first is time
#    for i in range(winsiz-1): # Already have one entry so range upto winsiz-1
#        tt = np.append(np.roll(tt,1,axis=1), var[np.newaxis,:], axis=0)


    # Compile all time slices for operation in new dimension
    tt = var #Doesn't matter how many dims var has, as long as the first is time
    for i in range(int(winsiz/2)): # This needs to be sorted for odd winsiz
        tt = tt + np.roll(var,1,axis=0) + np.roll(var,-1,axis=0) 



    # Collapse over new dimension
    if op == 'mean':
        runwin_tt = tt/winsiz # This denominator might be one off
    elif op == 'var':
        runwin_tt = np.var(tt, axis=0)    
    elif op == 'std':
        runwin_tt = np.std(tt, axis=0)    
    else:
        print 'not ready for that operation'

    # Remove snapshots in end buffer regions (which wrapped around in time).
    end_buff = range(nt - doodbuff-int(np.ceil(winsiz/2)),nt) # chop off end values
    start_buff = range(doodbuff+int(np.floor(winsiz/2))) # chop off start values
    #Subsample time axis
    subsample = [x for x in range(nt) if x not in range(0,nt,dt)]
    #print start_buff
    #print subsample
    #print end_buff
    
    # join the indices to be removed
    removelist = np.unique(np.append(np.append(end_buff, start_buff),subsample))
    runwin_var = np.delete(runwin_tt,removelist, axis=0)

    return runwin_var


# Time testing

In [13]:
import time

start = time.time()
runwin_var = runwin(var,3,3*24,19,'mean')
end = time.time()
print 'New method:',(end - start)
print 'file size:',np.shape(runwin_var)


New method: 111.328675032
file size: (213, 370, 530)


In [14]:

start = time.time()
strat_3day = np.squeeze( window_strat(profile, time_counter, H ) )
end = time.time()
print'Old method:',(end - start)
print 'file size:',np.shape(strat_3day)


Old method: 21.3961420059
file size: (213, 370, 530)


In [16]:
int(np.floor( (nt - winsiz - 2*doodbuff)/dt ))

21

In [18]:
print (175 - 24*3 - 2*19)/3
print 24*3/2

21
72


In [12]:
tt=runwin(var,3,3*34,19,'mean')



[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69]
[1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 79, 80, 82, 83, 85, 86, 88, 89, 91, 92, 94, 95, 97, 98, 100, 101, 103, 104, 106, 107, 109, 110, 112, 113, 115, 116, 118, 119, 121, 122, 124, 125, 127, 128, 130, 131, 133, 134, 136, 137, 139, 140, 142, 143, 145, 146, 148, 149, 151, 152, 154, 155, 157, 158, 160, 161, 163, 164, 166, 167, 169, 170, 172, 173]
[105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148,

In [7]:
print strat_3day.max()
print tt.min()

1.65996588731
-1.78516149877


In [8]:
int(np.floor(175 - 3*24 - 2*19)/3)

21

In [32]:
help(np.std)

Help on function std in module numpy.core.fromnumeric:

std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<class numpy._globals._NoValue>)
    Compute the standard deviation along the specified axis.
    
    Returns the standard deviation, a measure of the spread of a distribution,
    of the array elements. The standard deviation is computed for the
    flattened array by default, otherwise over the specified axis.
    
    Parameters
    ----------
    a : array_like
        Calculate the standard deviation of these values.
    axis : None or int or tuple of ints, optional
        Axis or axes along which the standard deviation is computed. The
        default is to compute the standard deviation of the flattened array.
    
        .. versionadded: 1.7.0
    
        If this is a tuple of ints, a standard deviation is performed over
        multiple axes, instead of a single axis or all the axes as before.
    dtype : dtype, optional
        Type to use in computing the stan