# Uniform distribution Generators

## Setup

### Standard library imports

In [1]:
%load_ext autoreload

In [2]:
import sys
import warnings
import logging

sys.path.append('..')
warnings.filterwarnings('ignore')
logger = logging.getLogger(__name__)

### Standard library imports

In [3]:
import os
from itertools import product

### Thrid Party libraries imports

In [4]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import chisquare
from tqdm import tqdm

### Modules imports

In [30]:
%autoreload 2

from source.generators.linear import random_distribution
from source import distributions

### Default parameters histograms 

In [31]:
generator_keys = ('uniform', 'sine', 'sawtooth', 'fibonacci')

In [32]:
plt.figure(figsize=(12, 6))

for generator in generator_keys:
    plt.hist(random_distribution(10_000, generator), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(r'..\\images\\generators', 'default_' + generator + '.png'))
    plt.clf()

<Figure size 864x432 with 0 Axes>

### Test generators

Testing generators with large number of samples to check if variables are in fact uniformly distributed

In [42]:
for generator in ('uniform', 'sine', 'sawtooth', 'fibonacci'):
    chisquared_result = chisquare(random_distribution(10_000, generator))
    print(f"Chi^2 test for {generator}\nStatistic: {chisquared_result[0]:.2f}\nP Value: {chisquared_result[1]:.2f}", end="\n\n")

Chi^2 test for uniform
Statistic: 1649.61
P Value: 1.00

Chi^2 test for sine
Statistic: 2207.60
P Value: 1.00

Chi^2 test for sawtooth
Statistic: 1650.27
P Value: 1.00

Chi^2 test for fibonacci
Statistic: 1666.11
P Value: 1.00



In [67]:
for generator in ('uniform', 'sine', 'sawtooth', 'fibonacci'):
    chisquared_result = chisquare(random_distribution(10, generator))
    print(f"Chi^2 test for {generator}\nStatistic: {chisquared_result[0]:.2f}\nP Value: {chisquared_result[1]:.2f}", end="\n\n")

Chi^2 test for uniform
Statistic: 1.30
P Value: 1.00

Chi^2 test for sine
Statistic: 2.84
P Value: 0.97

Chi^2 test for sawtooth
Statistic: 1.48
P Value: 1.00

Chi^2 test for fibonacci
Statistic: 0.04
P Value: 1.00



### Test starting points for periodic generators

Test different samples in sinusoidal and sawtooth arrays to check wheater they will results in expected behaviour and return uniform distributions

In [71]:
starting_points = 0.1 * np.arange(0, 11)
frequencies = np.pi * np.arange(0, 10)
samples = np.linspace(10, 100_000, 40, dtype=np.int)

In [34]:
for starting_point, frequency in product(starting_points, frequencies):
    plt.hist(random_distribution(10_000, 'sine', omega=frequency, first_sample=starting_point), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(
        r'..\\images\\generators',
        'sine_' + '_x0=' + str(round(starting_point, 2)) + '_omega=' + str(round(frequency, 2)) + '.png'))
    plt.clf()

<Figure size 432x288 with 0 Axes>

In [35]:
for starting_point, frequency in product(starting_points, frequencies):
    plt.hist(random_distribution(10_000, 'sawtooth', period=frequency, first_sample=starting_point), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(
        r'..\\images\\generators',
        'sawtooth_' + '_x0=' + str(round(starting_point, 2)) + '_omega=' + str(round(frequency, 2)) + '.png')
               )
    plt.clf()

<Figure size 432x288 with 0 Axes>

### Aggretating Chi^2 for perdioc generators

In [72]:
f"Running {2 * samples.shape[0] * starting_points.shape[0] * frequencies.shape[0]} iterations"

'Running 8800 iterations'

In [None]:
results = []
for generator, (n_samples, (starting_point, frequency)) in tqdm(
    product(('sine', 'sawtooth'),
            product(samples,
                    product(starting_points, frequencies)
                   )
           )
):
    # Compute chi^2 for each distribution
    distribution = random_distribution(n_samples, generator, omega=frequency, first_sample=starting_point)
    chi_2 = chisquare(distribution)
    results.append([generator, starting_point, frequencies, n_samples, chi_2[0], chi_2[1]])

4030it [10:55,  2.79it/s] 

In [None]:
df_chi_squared_results = pd.DataFrame.from_records(results, columns=['generator', 'starting_point', 'omega', 'chi_2_statistic', 'chi_2_p_value'])

### Testing the fibonacci generator

In [73]:
pq_pairs = (
    (8, 4),
    (100, 5),
    (100, 96),
)

In [None]:
for p, q in pq_pairs:
    plt.hist(random_distribution(10_000, 'fibonacci', p=p, q=q), bins=20)
    plt.grid()
    plt.title(fibonacci, fontsize=16)
    plt.savefig(os.path.join(
        r'..\\images\\generators',
        'fibonacci' + '_p=' + str(p) + '_q=' + str(q) + '.png')
               )
    plt.clf()

### Aggregating Chi^2 stats

In [77]:
samples = np.linspace(10, 100_000, 40, dtype=np.int)

In [None]:
results = []
for generator, n_samples in tqdm(product(('sine', 'sawtooth', 'fibonacci'), samples)):
    chisquared_result = chisquare(random_distribution(n_samples, generator))
    results.append([generator, n_samples, chiaquared_result[0], chiaquared_result[1]])

114it [05:15, 14.01s/it]

In [None]:
pd.DataFrame.from_records(results, columns=['generator', 'n_samples', 'chi2_stat', 'chi2_p_value'])