In [8]:
import StreamStatistics
import random
import time
import numpy as np

from StreamStatistics import SimpleTDigest

## Synthetic Pythonic stream ##

A gaussian distribution with mu,variance = (0, 2) will be used, we will check when the method converges with Epsilon = 0.01 difference. 

In [2]:
def gaussian_stream(mu=0.0, sigma=2.0, rate=1.0):
    """
    Generate an infinite stream of random numbers from a Gaussian (normal) distribution.

    Parameters
    ----------
    mu : float
        Mean of the Gaussian distribution.
    sigma : float
        Standard deviation of the Gaussian distribution.
    rate : float
        Number of samples per second (1.0 means 1 sample/sec).
    """
    interval = 1.0 / rate
    while True:
        value = random.gauss(mu, sigma)
        yield value
        time.sleep(interval)


In [3]:
gaussian_stream_generator = gaussian_stream();

mu = 0.0;

sigma = 2.0;

MovingSequential = StreamStatistics.MovingStatistics();

Epsilon = 0.01;


for i,sample in enumerate(gaussian_stream_generator):
    MovingSequential.UpdateAll(sample);
    
    print("Current Mean",MovingSequential.mean);

    if abs(mu - MovingSequential.mean) <= Epsilon:
        print(f"Mean converge!! at {i}");
        break;
    
    if abs(sigma**2 - (MovingSequential.variance)) <= Epsilon:
        print(f"Variance converge {i}!!");
        break;
    

    
    print("Current Variance",MovingSequential.variance);



Current Mean -1.2333017016713241
Current Variance 0
Current Mean -0.8826541552924162
Current Variance 0.24590740356309682
Current Mean 0.020176891383429485
Current Variance 2.568265398306958


KeyboardInterrupt: 

The mean has converged after 66 samples while the variance is still not close to the true one.

In [4]:
gaussian_stream_generator = gaussian_stream();

mu = 0.0;

sigma = 2.0;

MovingExponential = StreamStatistics.MovingStatistics(method="exponential");

Epsilon = 0.01;


for i,sample in enumerate(gaussian_stream_generator):


    MovingExponential.UpdateAll(sample);
    print("Current Mean",MovingExponential.mean);

    if abs(mu - MovingExponential.mean) <= Epsilon and abs(last_mean - MovingExponential.mean) <= Epsilon:
        print(f"Mean converge!! at {i}");
        break;
    
    print("Current Variance",MovingExponential.variance);
    
    if abs(sigma**2 - (MovingExponential.variance)) <= Epsilon and abs(last_variance - MovingExponential.variance) <= Epsilon:
        print(f"Variance converge {i}!!");
        break;
    
    last_mean = MovingExponential.mean;
    last_variance = MovingExponential.variance;

    



Current Mean -0.03581577932311007
Current Variance 0
Current Mean -0.04557040502311731
Current Variance 0.004634428197058078
Current Mean -0.1908203027426411
Current Variance 0.07627443955981789


KeyboardInterrupt: 

Mean converges at 176, and the variance is still far.


In [5]:
gaussian_stream_generator = gaussian_stream();
mu = 1.0;
sigma = 2.0;
WindowStatistic = StreamStatistics.WindowSequentialStatistics(window_size = 30);
Epsilon = 0.01;


for i,sample in enumerate(gaussian_stream_generator):

    WindowStatistic.UpdateAll(sample);
    print("Current Mean",WindowStatistic.mean);
    print("Current window", WindowStatistic.window)

    if abs(mu - WindowStatistic.mean) <= Epsilon and abs(last_mean - WindowStatistic.mean) <= Epsilon:
        print(f"Mean converge!! at {i}");
        break;
    
    print("Current Variance",WindowStatistic.variance);
    
    if abs(sigma**2 - (WindowStatistic.variance)) <= Epsilon and abs(last_variance - WindowStatistic.variance) <= Epsilon:
        print(f"Variance converge {i}!!");
        break;
    
    last_mean = WindowStatistic.mean;
    last_variance = WindowStatistic.variance;

    



N is still smaller than window size
Current Mean 0
Current window [0.37046167 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.        ]
Current Variance 0
N is still smaller than window size
Current Mean 0
Current window [ 0.37046167 -2.79061565  0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.        ]
Current Variance 0
N is still smaller than window size
Current Mean 0
Current window [ 0.37046167 -2.79061565  0.08579302  0.          0.          0.
  0.          0.          0.          0.          0.         

KeyboardInterrupt: 

It converges only for large window sizes

In [9]:
gaussian_stream_generator = gaussian_stream();
mu = 1.0;
sigma = 2.0;
Digest = SimpleTDigest(delta=0.1);
Epsilon = 0.01;
data_list = []
for i,sample in enumerate(gaussian_stream_generator):

    Digest.update(sample)
    data_list.append(sample);
    print(len(Digest.centroids))
    print("predicted 75%",Digest.percentile(75))
    print("predicted 50%",Digest.percentile(50))
    print("predicted 25%",Digest.percentile(25))
    q25, q50, q75 = np.percentile(data_list, [25, 50, 75])
    print(f"25%: {q25}, 50%: {q50}, 75%: {q75}")


1
predicted 75% 2.574040274328646
predicted 50% 2.574040274328646
predicted 25% 2.574040274328646
25%: 2.574040274328646, 50%: 2.574040274328646, 75%: 2.574040274328646
2
predicted 75% 2.574040274328646
predicted 50% -1.1771260195017124
predicted 25% -1.1771260195017124
25%: -0.23933444604412268, 50%: 0.6984571274134668, 75%: 1.6362487008710564
3
predicted 75% 2.574040274328646
predicted 50% -0.19482774824969448
predicted 25% -1.1771260195017124
25%: -0.6859768838757034, 50%: -0.19482774824969448, 75%: 1.189606263039476
4
predicted 75% 2.143367283990442
predicted 50% -0.19482774824969448
predicted 25% -1.1771260195017124
25%: -0.440402316062699, 50%: 0.9742697678703738, 75%: 2.251035531574993


KeyboardInterrupt: 