In [73]:
import numpy as np
import matplotlib.pyplot as plt

FILE = 'data/pills-s0.npy'
Y = np.load(FILE)
X = np.arange(1000)

WINDOW_LENGTH = 50

def interweave(a,b):
    return np.ravel(np.column_stack((a,b)))

def windows(x, window_len=50):
    # get indices array
    I = np.arange(len(x))
    # divide into possibly uneven window lengths
    segs = int(len(x) / window_len)
    splits = np.array_split(x, segs)
    true_window_lengths = np.array([len(s) for s in splits])
    # get overlap 50% window indices
    incs = (true_window_lengths * 0.5).astype(int)
    i_splits = np.array_split(I, segs)
    i_mins = np.array([s[0] for s in i_splits])
    i_maxs = np.array([s[-1]+1 for s in i_splits])
    i_mids = i_mins + incs
    # get windows into original array
    mins = np.array_split(x, i_mins)
    mids = np.array_split(x, i_mids)
    maxs = np.array_split(x, i_maxs)
    W = interweave(mins, mids)
    W = [w for w in W if len(w) > 0]
    return W[1:-1]

W = windows(X, WINDOW_LENGTH)
print('# windows: {}'.format(len(W)))
print('window lengths:', np.array([len(w) for w in W]))
print('window durations:', [(w[0], w[-1]) for w in W])
print('windows:')
print(W[:5])
print('...')
print(W[-5:])

# windows: 39
window lengths: [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50
 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50]
window durations: [(0, 49), (25, 74), (50, 99), (75, 124), (100, 149), (125, 174), (150, 199), (175, 224), (200, 249), (225, 274), (250, 299), (275, 324), (300, 349), (325, 374), (350, 399), (375, 424), (400, 449), (425, 474), (450, 499), (475, 524), (500, 549), (525, 574), (550, 599), (575, 624), (600, 649), (625, 674), (650, 699), (675, 724), (700, 749), (725, 774), (750, 799), (775, 824), (800, 849), (825, 874), (850, 899), (875, 924), (900, 949), (925, 974), (950, 999)]
windows:
[array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]), array([25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
       42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 

In [68]:
import numpy as np
import matplotlib.pyplot as plt

FILE = 'data/pills-s0.npy'
Y = np.load(FILE)
X = np.arange(1000)

WINDOW_LENGTH = 50
OVERLAP = .5

VALID_OVERLAPS = [.5, .66, .8, .9, .95]

def parse_overlap(overlap):
    if overlap not in VALID_OVERLAPS:
        raise ValueError('Cannot handle overlap of: {}. Valid values are: {}'.format(overlap, VALID_OVERLAPS))
    if overlap == .66:
        overlap = 2/3
    inc = round(1-overlap, 2)
    iters = round(1/inc)
    incs = np.array([inc*i for i in range(iters)]).reshape(-1, 1)
    return overlap, inc, incs, iters

def interweave(a,b):
    return np.ravel(np.column_stack((a,b)))

def windows(x, window_len=50, overlap=0.5):
    # get indices array
    I = np.arange(len(x))
    # divide into possibly uneven window lengths
    segs = int(len(x) / window_len)
    splits = np.array_split(I, segs)
    true_window_lengths = np.array([len(s) for s in splits])
    i_mins = np.array([s[0] for s in splits])
    i_maxs = np.array([s[-1]+1 for s in splits])
    # parse overlap
    overlap, inc, _incs, iters = parse_overlap(overlap)
    print('overlap:', overlap)
    print('iters:', iters)
    print('inc:', inc, _incs)
    print('expected data size:', int(segs*iters)-1)
    # get overlap 50% window indices
    incs = (true_window_lengths * 0.5).astype(int)
    d_incs = (true_window_lengths * _incs).round().astype(int)
    print('window lengths:', true_window_lengths)
    print('dincs:', d_incs)
    print('incs:', incs)
    i_mids = i_mins + incs
    # get windows into original array
    mins = np.array_split(x, i_mins)
    mids = np.array_split(x, i_mids)
    maxs = np.array_split(x, i_maxs)
    W = interweave(mins, mids)
    W = [w for w in W if len(w) > 0]
    return W



W = windows(X, WINDOW_LENGTH, OVERLAP)

print('# windows: {}'.format(len(W)))
print('window lengths:', np.array([len(w) for w in W]))
print('window durations:', [(w[0], w[-1]) for w in W])

overlap: 0.5
iters: 2
inc: 0.5 [[0. ]
 [0.5]]
expected data size: 39
window lengths: [50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50]
dincs: [[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25]]
incs: [25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25 25]
# windows: 41
window lengths: [25 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50
 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 25]
window durations: [(0, 24), (0, 49), (25, 74), (50, 99), (75, 124), (100, 149), (125, 174), (150, 199), (175, 224), (200, 249), (225, 274), (250, 299), (275, 324), (300, 349), (325, 374), (350, 399), (375, 424), (400, 449), (425, 474), (450, 499), (475, 524), (500, 549), (525, 574), (550, 599), (575, 624), (600, 649), (625, 674), (650, 699), (675, 724), (700, 749), (725, 774), (750, 799), (775, 824), (800, 849), (825, 874), (850, 899), (875, 924), (900, 949), (925, 974), (950, 999), (975, 9