In [1]:
import numpy as np
import dill
from lkf_tools.dataset import *

In [2]:
def get_lkfs_all(lkf_data):
    '''returns a list with the lkf data for each day (len(lkfs_all) = ntimesteps).
    the lkfs at one day are stored as (p_len,7) shaped array with p_len
    being their pixel length. it has 7 attributes like longitude and latitude,
    stored in the [:,2] and [:,3] position, respectively.
    '''

    lkfs_all = []
    for it in lkf_data.indexes:
        lkfs_path = lkf_data.lkfpath.joinpath('lkf_%s_%03i.npy'
                                              % (lkf_data.netcdf_file.split('/')[-1].split('.')[0],
                                                 (it+1))
                                             )
        lkfs_all.append(np.load(lkfs_path, allow_pickle=True))
    
    return lkfs_all

def number_of_lkfs_daily(lkfs_all):
    '''returns an array with the number of detected lkfs at each day
    (len(lkfs_per_day) = ntimesteps).
    '''
    
    lkfs_per_day = np.zeros(len(lkfs_all),dtype='int')
    
    for ind, item in enumerate(lkfs_all):
        lkfs_per_day[ind] = np.shape(item)[0]
        
    return lkfs_per_day

def get_tracks_all(lkf_data):
    '''returns a list with the tracks for each day (len(tracks_all) = ntimesteps).
    the tracks at day i are matching pairs, i.e. the number of an LKF in record i
    and the number of an associated LKF in record i+1.
    '''
    tracks_all = []
    
    for it in lkf_data.indexes:
        tracks_path = lkf_data.track_output_path.joinpath('lkf_tracked_pairs_%s_to_%s.npy'
                                                          % (lkf_data.lkf_filelist[it][4:-4],
                                                             lkf_data.lkf_filelist[it+1][4:-4])
                                                         )
        tracks_all.append(np.load(tracks_path, allow_pickle=True))
    
    return tracks_all

def get_lkf_paths(lkfs_all, tracks_all):
    '''returns the array lkf_tracks:
    lkf_tracks[i] are the lkf paths that start at day i. it contains
    the index of the lkfs at every day they are tracked to.

    e.g.
    lkf_tracks[0][0] = [0]           -> lkf 0 at day 1 is not tracked to day 2
    lkf_tracks[0][4] = [4, 2, 5]     -> lkf 4 is tracked until day 3. in the
                                        second record, it has the index 2, in
                                        the third record the index 5

    todo: in this configuration, only the first path is saved if the lkf has
    two associated feature in the following record.
    '''
    
    lkf_paths = np.zeros(len(lkfs_all),dtype='object')

    # number all the lkfs on day one
    for startday in range(len(lkfs_all)):
        lkf_paths[startday] = np.arange(np.shape(lkfs_all[startday])[0],
                                         dtype='object')

        # get the lkf number on day two (if the lkf can be tracked)
        # and add it to the lkf
        for ind, item in enumerate(lkf_paths[startday]):
            in_next_record = (item == tracks_all[startday][:,0])

            if np.any(in_next_record):
                pos_in_next_record = np.where(in_next_record)
                lkf_number_in_next_record = tracks_all[startday][pos_in_next_record,1]

                lkf_paths[startday][ind] = np.append(item, lkf_number_in_next_record[0,0])

        # loop over the following days
        for i in range(1,len(tracks_all)-startday):
            for ind, item in enumerate(lkf_paths[startday]):
                if np.array(item).size == i+1:
                    in_next_record = (item[-1] == tracks_all[i+startday][:,0])

                    if np.any(in_next_record):
                        pos_in_next_record = np.where(in_next_record)

                        lkf_number_in_next_record = tracks_all[i+startday][pos_in_next_record,1]

                        lkf_paths[startday][ind] = np.append(item, lkf_number_in_next_record[0,0])

    # remove already tracked paths, i.e. delete a path from day n if
    # it starts at day n-1, so only the paths start start at day n remain
    already_tracked = np.zeros(len(lkf_paths)-1,dtype='object')
    for it in range(len(lkf_paths)-1):
        for ind, item in enumerate(lkf_paths[it]):
            if np.array(item).size > 1:
                already_tracked[it] = [np.array(item).flat[1] if already_tracked[it]==0 else np.append(already_tracked[it],np.array(item).flat[1])]

    for i in range(1,len(tracks_all)):
        lkf_paths[i] = np.delete(lkf_paths[i], already_tracked[i-1])

    return lkf_paths

def get_lifetimes(lkf_paths):
    '''returns the lifetimes of each tracked path that starts at the
    respective day, as well as the total mean lifetime of all tracked paths
    and the total lifetime of all tracked paths that are longer than one day.
    '''
    
    # lifetimes[i] contains the lifetimes of the paths that start at day i
    lifetimes = np.zeros_like(lkf_paths, dtype='object')
    
    for i in range(len(lkf_paths)):
        lifetimes[i] = np.zeros_like(lkf_paths[i], dtype='object')
        
        for ind, item in enumerate(lkf_paths[i]):
            lifetimes[i][ind] = np.array(item).size
    
    # mean lifetime of all tracked paths
    mean_lifetime = np.concatenate(lifetimes).mean()

    # mean lifetime of all tracked paths longer than one day
    lifetimes_tracked_lkfs = [lifetimes[i][np.where(lifetimes[i]!=1)]
                              for i in range(len(lifetimes))
                             ]
    mean_lifetime_tracked_lkfs = np.concatenate(
                                        lifetimes_tracked_lkfs
                                        ).mean()

    return lifetimes, mean_lifetime, mean_lifetime_tracked_lkfs

In [3]:
path = "data/2015/lkf_pickle_files/lkf_data_2015_005.pickle"

# get the lkf data object 
lkf_data = dill.load(open(path, "rb"))

In [4]:
# get the lkf data for every timestep
lkfs_all = get_lkfs_all(lkf_data)

In [5]:
# get the number of lkfs at each timestep
n_lkfs = number_of_lkfs_daily(lkfs_all)

print('day   number of lkfs')
for ind, item in enumerate(n_lkfs):
    print(ind, '   ', item)

day   number of lkfs
0     90
1     63
2     55
3     69
4     90
5     79
6     71
7     73
8     77


In [6]:
# get the tracks for every timestep
tracks_all = get_tracks_all(lkf_data)

In [7]:
# show the first 15 (arbitrary number) matching pairs at each day
print('\n'.join(['\t'.join([str(x[i]) if len(x)>i else '' for x in tracks_all]) for i in range(15)]))

[4 2]	[1 3]	[2 3]	[1 2]	[0 0]	[1 2]	[0 0]	[0 0]	[0 1]
[8 8]	[2 5]	[4 6]	[ 3 11]	[7 4]	[ 4 56]	[1 1]	[2 3]	[ 3 68]
[9 4]	[ 5 11]	[8 9]	[4 4]	[11  6]	[11  6]	[2 2]	[ 4 72]	[4 3]
[12 10]	[ 6 48]	[12 12]	[10 12]	[14  9]	[12 65]	[4 5]	[5 4]	[5 4]
[13  7]	[ 6 51]	[14 15]	[11  9]	[15 14]	[14  8]	[ 6 17]	[9 6]	[8 5]
[14  9]	[ 8 15]	[19 20]	[12 18]	[18 11]	[16 57]	[ 6 69]	[15 11]	[13 70]
[17  8]	[12 17]	[20 67]	[15 74]	[27 27]	[19 10]	[ 7 14]	[18 20]	[14  9]
[17 59]	[14 18]	[21 22]	[15 84]	[27 72]	[22 16]	[ 8 63]	[19 14]	[18 10]
[20 13]	[14 19]	[22 19]	[16 15]	[34 37]	[23 60]	[10 23]	[21 18]	[20 71]
[21 18]	[15 22]	[23 25]	[18 21]	[39 45]	[24 14]	[11 64]	[22 22]	[21 12]
[23 16]	[19 20]	[24 25]	[20 24]	[39 60]	[26 24]	[14 26]	[35 27]	[27 21]
[25 17]	[23 25]	[25 26]	[21 22]	[40 44]	[27 23]	[15 28]	[36 28]	[30 21]
[28 14]	[23 30]	[27 27]	[26 26]	[41 74]	[30 26]	[18 31]	[37 30]	[30 29]
[30 19]	[24 27]	[29 29]	[27 31]	[42 39]	[35 42]	[22 29]	[37 33]	[34 31]
[31 21]	[26 28]	[29 31]	[28 38]	[46 53]	[3

In [8]:
# get the lkf paths starting at each timestep
lkf_paths = get_lkf_paths(lkfs_all, tracks_all)

# get the lifetime of the lkf paths and their mean as well as
# the mean of only the paths that are longer than one day
lifetimes, mean_lifetime, mean_lifetime_tracked_lkfs = get_lifetimes(lkf_paths)

In [9]:
# these are the lkf paths that start at day 2. if a path starts already at
# day 1, it is not included here (only in lkf_paths[0])
lkf_paths[1]

array([0, 3, array([ 5, 11]), array([ 6, 48]), array([12, 17]), 22,
       array([23, 25, 26, 26]), array([26, 28]), 28, array([29, 32]),
       array([34, 29, 29]), 38, 39, 40, 41, 42, array([43, 31, 28, 38]),
       44, 45, array([48, 39]), 50, array([51, 29, 29]), 53,
       array([55, 12, 12, 18, 11,  6, 17]), 57, array([59, 15]), 60, 61],
      dtype=object)

In [10]:
mean_lifetime # in days

1.7635933806146573

In [11]:
mean_lifetime_tracked_lkfs # in days

2.824858757062147