## Testing the sequential segmentation algorithm with simulated data


In [3]:
%matplotlib inline
import sys
sys.path.append('/home/paulo/github')

import time
import datetime

import numpy as np
import pandas as pd

from bayeseg.SeqSeg import SeqSeg

savefolder = '/home/paulo/github/bayeseg/Output/'

## Computing time for various time resolutions

In [4]:
ss = SeqSeg()


sizes = [10000, 100000, 1000000]
tres = [1, 10, 100, 1000]

ss.initialize(1, 0.1, 100, 200, 1)
result = []
for size in sizes:
    signal = np.random.normal(0, 1, [size, 1])
    ss.feed_data(signal)
    for res in tres:
        t, tdur = ss.segments(minlen = 1000000, res = res, verbose = False)
        print("Size = " + str(size) + ", tres = " + str(res) + ", t = ", str(tdur))
        result.append([size, res, tdur])

Size = 10000, tres = 1, t =  0.013075113296508789
Size = 10000, tres = 10, t =  0.01116800308227539
Size = 10000, tres = 100, t =  0.009931087493896484
Size = 10000, tres = 1000, t =  0.011090517044067383
Size = 100000, tres = 1, t =  0.0749046802520752
Size = 100000, tres = 10, t =  0.007799386978149414
Size = 100000, tres = 100, t =  0.0027260780334472656
Size = 100000, tres = 1000, t =  0.0019636154174804688
Size = 1000000, tres = 1, t =  0.45624852180480957
Size = 1000000, tres = 10, t =  0.05502486228942871
Size = 1000000, tres = 100, t =  0.014348030090332031
Size = 1000000, tres = 1000, t =  0.008195877075195312
10000 & 1 & 0.01308 \\
10000 & 10 & 0.01117 \\
10000 & 100 & 0.009931 \\
10000 & 1000 & 0.01109 \\
100000 & 1 & 0.0749 \\
100000 & 10 & 0.007799 \\
100000 & 100 & 0.002726 \\
100000 & 1000 & 0.001964 \\
1000000 & 1 & 0.4562 \\
1000000 & 10 & 0.05502 \\
1000000 & 100 & 0.01435 \\
1000000 & 1000 & 0.008196 \\


## Testing $\alpha$ and $\beta$ with many segments

In [None]:
npoints = 1000000
cuts = [10000, 110000, 200000, 500000, 750000, 1000000]
deltalist = [1, 1.1, 1.5]
stdnoise = 1

mciter = 10000
mcburn = 10000
alphalist = [0.1, 0.5, 0.9, 0.99]
betalist = [1, 1e-1, 1e-2, 1e-3, 2e-3, 5e-3, 1e-4, 5e-4, 1e-5, 1e-6]

nruns = 30

ss = SeqSeg()
res = []
for delta in deltalist:
    # Simulates signal
    signal = np.random.normal(0, 1, [npoints, 1])
    for i in range(len(cuts)):
        if (i+1)%2:
            signal[cuts[i]:cuts[i+1]] = signal[cuts[i]:cuts[i+1]]*(np.sqrt(delta))
    ss.feed_data(signal)
    for beta in betalist:
        for alpha in alphalist:
            ss.initialize(beta, alpha, mciter, mcburn, nchains = 1)
            nsegmean = 0
            minseg = 500000
            maxseg = 0
            tmean = 0
            for i in range(nruns):
                t, tdur = ss.segments(minlen = 5000, res = 100, verbose = False)
                nsegmean = nsegmean + (len(t)+1)/nruns
                tmean = tmean + tdur / nruns
                if len(t) + 1 < minseg:
                    minseg = len(t) + 1
                if len(t) + 1 > maxseg:
                    maxseg = len(t) + 1
            
            print("Delta = " + str(delta) + ", alpha = " + str(alpha) + ", beta = " + str(beta) + ", " + str(nsegmean) + " segments on average in " + "{:.2}".format(tmean) + " seconds.")
                
            res.append([delta, beta, alpha, nsegmean, minseg, maxseg, tdur])
            

df = pd.DataFrame(res, columns = ['delta', 'beta', 'alpha', 'nsegmean', 'minseg', 'maxseg', 'tmean'])

## Calibration for a grid of $\beta$ and $\alpha$ with varying delta

In [None]:
# Signal characteristics
npoints = 1000000
cuts = [10000, 110000, 200000, 500000, 750000, 1000000]
delta = [1.1, 1, 1.5, 1, 1.2]
stdnoise = 1

mciter = 10000
mcburn = 10000

# Alpha grid
alphamin = 0.1
alphamax = 0.1
nstep = 0
adelta = (alphamax - alphamin) / max(1, nstep)
alphalist = [alphamin + d*adelta for d in range(nstep+1)]

# Beta grid
betamin = 1e-3
betamax = 1e-1
nstep = 100
bdelta = (betamax - betamin) / max(1, nstep)
betalist = [betamin + d*bdelta for d in range(nstep+1)]

# To use union of two separate grids 
#betamin = 1e-3
#betamax = 1e-1
#nstep = 100
#bdelta = (betamax - betamin) / max(1, nstep)
#betalist = betalist + [betamin + d*bdelta for d in range(nstep+1)]

# Number of segmentations to run for each combination of parameters
nruns = 1

ss = SeqSeg()
res = []

# Simulates signal
signal = np.random.normal(0, stdnoise, [npoints, 1])
for i in range(len(cuts)-1):
    signal[cuts[i]:cuts[i+1]] = signal[cuts[i]:cuts[i+1]]*(np.sqrt(delta[i]))


ss.feed_data(signal)        
result = []
tresult = []
cont = 1
ntotal = len(betalist)*len(alphalist)
for alpha in alphalist:
    for beta in betalist:
        ss.initialize(beta, alpha, mciter, mcburn, nchains = 1)
        t, tdur = ss.segments(minlen = 5000, res = 1, verbose = False)
        result.append([alpha, beta, len(t)+1])
        #t.sort()
        tresult.append(t)

        print('({:.2%})'.format(cont/ntotal) + ' Alpha = {:.3}'.format(alpha) + ', Beta = {:.5}'.format(beta) + ", " + str(len(t)+1) + " segments")
        cont = cont + 1         


