In [21]:
from numpy import random
from tabulate import tabulate
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import factorial
from PIL import Image
PATH = "img/"

In [12]:
sample_size = [10, 100, 1000]
loc, scale = 0.0, 1.0
EXP_NUM = 4

In [13]:
def norm(size):
    return random.normal(loc, scale, size)

def cauchy(size):
    return random.standard_cauchy(size)

def laplace(size):
    return random.laplace(loc, 1 / np.sqrt(2), size)

def poisson(size):
    return random.poisson(10, size)

def uniform(size):
    return  random.uniform(-np.sqrt(3), np.sqrt(3), size)

distribution_fun = { 'norm' : norm,
                     'cauchy' : cauchy,
                     'laplace' : laplace,
                     'poisson' : poisson,
                     'uniform' : uniform, }


In [14]:
def calculate_sample_mean(sample):
     return np.mean(sample)
    
def calculate_median(sample):
     return np.median(sample)

def calculate_half_sum_extreme(sample):
    return (min(sample) + max(sample)) / 2

def calculate_quantile(sample, index):
    return np.quantile(sample, index)

def calculate_half_sum_quantile(sample):
    return (calculate_quantile(sample, 0.25) + calculate_quantile(sample, 0.75)) / 2



def calculate_truncated_mean(sample):
    res = 0
    n = len(sample)
    r = int(0.25 * n)
    i = r + 1
    while i <= n - r:
        res += sample[i]
        i = i + 1
    return res / (n - 2 * r)



characteristic = {
    'sample_mean': calculate_sample_mean,
    'median':calculate_median,
    'half_sum_extreme': calculate_half_sum_extreme,
    'half_sum_quantile': calculate_half_sum_quantile,
    'truncated_mean': calculate_truncated_mean,
}

In [15]:
def D(sample):
    return np.var(sample)

def E(sample):
    return np.mean(sample)

In [16]:
def calculate_mean_value(result):
    mean_value = {}
    answer = {
        'sample_mean': [],
        'median':[],
        'half_sum_extreme': [],
        'half_sum_quantile': [],
        'truncated_mean': [], }
    for ch in characteristic:
        mean_value['e']=E(result[ch])
        mean_value['d']=D(result[ch])
        answer[ch] = mean_value.copy()
    return answer

In [17]:
answer = {}
sample_res ={}
result ={}
for dist in distribution_fun:
    for size in sample_size:
        result.clear();
        result = {
        'sample_mean': [],
        'median':[],
        'half_sum_extreme': [],
        'half_sum_quantile': [],
        'truncated_mean': [], }
        for i in range(1000):
            sample = distribution_fun[dist](size)
            for ch in characteristic:
                tmp = characteristic[ch](sample)
                result[ch].append(tmp)  
        sample_res[str(size)]=calculate_mean_value(result)
    answer[dist] = sample_res.copy()
        

from pprint import pprint 
pprint(answer)


{'cauchy': {'10': {'half_sum_extreme': {'d': 22469.715683198177,
                                        'e': 6.149399649018662},
                   'half_sum_quantile': {'d': 0.8643900541494564,
                                         'e': -0.01962410613472786},
                   'median': {'d': 0.3131766413595252,
                              'e': 0.001557767538188747},
                   'sample_mean': {'d': 907.4582855965612,
                                   'e': 1.2234768427887124},
                   'truncated_mean': {'d': 1527.8721567160385,
                                      'e': 2.543461531657853}},
            '100': {'half_sum_extreme': {'d': 15128110.800813416,
                                         'e': 97.47688781982221},
                    'half_sum_quantile': {'d': 0.055478883170312514,
                                          'e': 0.016598588334559784},
                    'median': {'d': 0.025102762539157483,
                               'e': 0.01187077

In [24]:
def show_results(dist, name):
    res_d =[]
    res_e=[]
    rows = []
    headers = [name , "x_", "med(x)", "z_R", "z_Q", "z_tr"]
    for size in dist:        
        res_d.clear()
        res_e.clear()
        res_e.append(" E(z) =  " +str(size))
        res_d.append(" D(z) =  " +str(size))
        for ch in dist[size]:
            res_d.append(np.around((dist[size])[ch]['d'],  decimals=4))
            res_e.append(np.around((dist[size])[ch]['e'],  decimals=4))
        rows.append(res_e.copy())        
        rows.append(res_d.copy())
        rows.append(["","","","","",""])
    print(tabulate(rows, headers))
        
for key in distribution_fun:
    show_results(answer[key], key)
    print("\n")

norm          x_       med(x)    z_R      z_Q      z_tr
------------  -------  --------  -------  -------  -------
E(z) =  10    0.0016   -0.0011   0.0085   -0.0049  0.0055
D(z) =  10    0.0997   0.1425    0.1875   0.1109   0.1647

E(z) =  100   -0.0053  -0.0018   -0.0083  -0.0045  -0.0068
D(z) =  100   0.0093   0.0147    0.0903   0.0114   0.0191

E(z) =  1000  0.0002   0.0004    0.0027   0.0009   0.0007
D(z) =  1000  0.001    0.0016    0.0581   0.0012   0.002



cauchy        x_         med(x)    z_R            z_Q      z_tr
------------  ---------  --------  -------------  -------  ----------
E(z) =  10    1.2235     0.0016    6.1494         -0.0196  2.5435
D(z) =  10    907.4583   0.3132    22469.7157     0.8644   1527.8722

E(z) =  100   1.9565     0.0119    97.4769        0.0166   4.4856
D(z) =  100   6060.7936  0.0251    15128110.8008  0.0555   23401.4874

E(z) =  1000  0.1619     0.0017    75.7176        0.0024   0.055
D(z) =  1000  74.9162    0.0026    16842786.6595  0.0046   1