# Uniform distribution Generators

## Setup

### Standard library imports

In [2]:
%load_ext autoreload

In [3]:
import sys
import warnings
import logging

sys.path.append('..')
warnings.filterwarnings('ignore')
logger = logging.getLogger(__name__)

### Standard library imports

In [4]:
import os
from itertools import product

### Thrid Party libraries imports

In [5]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import chisquare
from tqdm import tqdm

### Modules imports

In [6]:
%autoreload 2

from source.generators.linear import random_uniform_distribution
from source import distributions

### Default parameters histograms 

In [None]:
generator_keys = ('uniform', 'sine', 'sawtooth', 'fibonacci')

In [None]:
plt.figure(figsize=(12, 6))

for generator in generator_keys:
    plt.hist(random_uniform_distribution(10_000, generator), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(r'..\\images\\generators', 'default_' + generator + '.png'))
    plt.clf()

### Test generators

Testing generators with large number of samples to check if variables are in fact uniformly distributed

In [None]:
for generator in ('uniform', 'sine', 'sawtooth', 'fibonacci'):
    chisquared_result = chisquare(random_uniform_distribution(10_000, generator))
    print(f"Chi^2 test for {generator}\nStatistic: {chisquared_result[0]:.2f}\nP Value: {chisquared_result[1]:.2f}", end="\n\n")

In [None]:
for generator in ('uniform', 'sine', 'sawtooth', 'fibonacci'):
    chisquared_result = chisquare(random_uniform_distribution(10, generator))
    print(f"Chi^2 test for {generator}\nStatistic: {chisquared_result[0]:.2f}\nP Value: {chisquared_result[1]:.2f}", end="\n\n")

### Test starting points for periodic generators

Test different samples in sinusoidal and sawtooth arrays to check wheater they will results in expected behaviour and return uniform distributions

In [None]:
starting_points = 0.1 * np.arange(0, 11)
frequencies = np.pi * np.arange(0, 10)
samples = np.linspace(10, 10_000, 10, dtype=np.int)

In [None]:
for starting_point, frequency in product(starting_points, frequencies):
    plt.hist(random_uniform_distribution(10_000, 'sine', period=frequency, first_sample=starting_point), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(
        r'..\\images\\generators',
        'sine_' + '_x0=' + str(round(starting_point, 2)) + '_omega=' + str(round(frequency, 2)) + '.png'))
    plt.clf()

In [None]:
for starting_point, frequency in product(starting_points, frequencies):
    plt.hist(random_uniform_distribution(10_000, 'sawtooth', period=frequency, first_sample=starting_point), bins=20)
    plt.grid()
    plt.title(generator, fontsize=16)
    plt.savefig(os.path.join(
        r'..\\images\\generators',
        'sawtooth_' + '_x0=' + str(round(starting_point, 2)) + '_omega=' + str(round(frequency, 2)) + '.png')
               )
    plt.clf()

### Aggretating Chi^2 for perdioc generators

In [None]:
f"Running {2 * samples.shape[0] * starting_points.shape[0] * frequencies.shape[0]} iterations"

In [None]:
results = []
for generator, (n_samples, (starting_point, frequency)) in tqdm(
    product(('sine', 'sawtooth'),
        product(samples,
            product(starting_points, frequencies)
               )
           )
        ):
    # Compute chi^2 for each distribution
    distribution = random_uniform_distribution(n_samples, generator, period=frequency, first_sample=starting_point)
    chi_2 = chisquare(distribution)
    results.append([generator, starting_point, frequency, n_samples, chi_2[0], chi_2[1]])

In [None]:
df_chi_squared_results = pd.DataFrame.from_records(results, columns=['generator', 'starting_point', 'omega', 'samples', 'chi_2_statistic', 'chi_2_p_value'])

In [None]:
df_chi_squared_results.to_csv('../data/chi_square_test_results.csv')

### Testing the fibonacci generator

In [1]:
pqm_pairs = (
    (8, 4, 100),
    (100, 5, 100),
    (100, 96, 100),
    (1000, 996, 1000),
    (1000, 100, 1000),
    (8, 4, 10),
    (8, 4, 5),
)

In [7]:
for p, q, m in pqm_pairs:
    plt.hist(random_uniform_distribution(10_000, 'fibonacci', p=p, q=q, m=m), bins=20)
    plt.grid()
    plt.title('fibonacci', fontsize=16)
    plt.savefig(os.path.join(
        r'..\images\generators',
        'fibonacci' + '_p=' + str(p) + '_q=' + str(q) + '_m=' + str(m) + '.png')
               )
    plt.clf()

<Figure size 432x288 with 0 Axes>

### Aggregating Chi^2 stats

In [None]:
samples = np.linspace(10, 100_000, 40, dtype=np.int)

In [None]:
results = []
for generator, n_samples in tqdm(product(('sine', 'sawtooth', 'fibonacci'), samples)):
    chisquared_result = chisquare(random_distribution(n_samples, generator))
    results.append([generator, n_samples, chiaquared_result[0], chiaquared_result[1]])

In [None]:
pd.DataFrame.from_records(results, columns=['generator', 'n_samples', 'chi2_stat', 'chi2_p_value'])