### Standard error on the sample average

In [1]:
import numpy as np

In [2]:
### creating random numbers in NumPy

rg = np.random.default_rng(12345)

In [3]:
### random.random()

### random.gauss() - > rg.normal()

In [4]:
### uniform random number generator between 0 and 1

rg.random()

0.22733602246716966

In [5]:
rg.random()

0.31675833970975287

In [6]:
rg.random()

0.7973654573327341

In [7]:
### reset the random seed and reproduce

rg = np.random.default_rng(12345)

In [8]:
print( rg.random(), rg.random(), rg.random() )

0.22733602246716966 0.31675833970975287 0.7973654573327341


### 5000 replications of 7 independent random values

In [9]:
rg = np.random.default_rng(12345)

Ns = 7

nr = 5000

X = rg.random((nr, Ns))

In [10]:
%whos

Variable   Type         Data/Info
---------------------------------
Ns         int          7
X          ndarray      5000x7: 35000 elems, type `float64`, 280000 bytes (273.4375 kb)
np         module       <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
nr         int          5000
rg         Generator    Generator(PCG64)


In [11]:
print( X.shape )

(5000, 7)


### calculate the average along each row

In [12]:
x_row_means = X.mean(axis=1)

In [13]:
x_row_means.shape

(5000,)

### Calculate the standard deviation of the sample average

Use the `.std()` HOWEVER be cautious...by default NumPy uses the biased estimate. So we have to set the `ddof` to be 1.

In [14]:
x_mean_se = x_row_means.std(ddof = 1)

In [15]:
%whos

Variable      Type         Data/Info
------------------------------------
Ns            int          7
X             ndarray      5000x7: 35000 elems, type `float64`, 280000 bytes (273.4375 kb)
np            module       <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
nr            int          5000
rg            Generator    Generator(PCG64)
x_mean_se     float64      0.10833789872393827
x_row_means   ndarray      5000: 5000 elems, type `float64`, 40000 bytes


## Use a list comprehension to LOOP OVER sample sizes

One line of code to calculate the sample size.

In [16]:
rg.random((5000, Ns)).mean(axis=1).std(ddof=1)

0.11075996157160421

In [17]:
sample_size_use = 5 * (2 ** np.arange(12))

In [18]:
print( sample_size_use )

[    5    10    20    40    80   160   320   640  1280  2560  5120 10240]


In [19]:
print( type(sample_size_use) )

<class 'numpy.ndarray'>


In [20]:
se_vs_sample_size = [ rg.random((5000, nns)).mean(axis=1).std(ddof=1) for nns in list(sample_size_use)]

In [21]:
### BUT in your homework you must use rg.normal() where you must specify the
### mean and standard deviation

help(np.random.normal)

Help on built-in function normal:

normal(...) method of numpy.random.mtrand.RandomState instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    .. note::
        New code should use the ``normal`` method of a ``default_rng()``
        instance instead; see `random-quick-start`.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : fl

In [22]:
rg.normal(5, 1, (12, 3))

array([[4.65648751, 3.84409303, 6.01813058],
       [5.55935812, 5.92722578, 5.88324665],
       [4.47095892, 3.8614639 , 6.18931437],
       [5.28595273, 5.61090631, 4.44910322],
       [4.26813976, 4.29135711, 6.79445196],
       [2.4294618 , 3.22372612, 2.8941075 ],
       [5.48822189, 5.08956444, 4.83387817],
       [4.28279368, 3.37989358, 6.39749529],
       [6.53448063, 4.19335345, 4.35537123],
       [4.28549377, 6.62957596, 5.92067698],
       [7.21772156, 5.51885275, 4.67623065],
       [5.57082998, 6.86310007, 3.59189566]])

### look at the averaging operation

In [25]:
rg = np.random.default_rng(12345)

In [26]:
y = rg.random((12, 4))

In [27]:
print( y )

[[0.22733602 0.31675834 0.79736546 0.67625467]
 [0.39110955 0.33281393 0.59830875 0.18673419]
 [0.67275604 0.94180287 0.24824571 0.94888115]
 [0.66723745 0.09589794 0.44183967 0.88647992]
 [0.6974535  0.32647286 0.73392816 0.22013496]
 [0.08159457 0.1598956  0.34010018 0.46519315]
 [0.26642103 0.8157764  0.19329439 0.12946908]
 [0.09166475 0.59856801 0.8547419  0.60162124]
 [0.93198836 0.72478136 0.86055132 0.9293378 ]
 [0.54618601 0.93767296 0.49498794 0.27377318]
 [0.45177871 0.66503892 0.33089093 0.90345401]
 [0.25707418 0.33982834 0.2588534  0.35544648]]


In [28]:
y_row_means = y.mean(axis = 1)

In [29]:
print( y_row_means )

[0.50442862 0.3772416  0.70292144 0.52286374 0.49449737 0.26169588
 0.35124022 0.53664898 0.86166471 0.56315502 0.58779064 0.3028006 ]


In [30]:
y_row_0 = [ 0.22733602, 0.31675834, 0.79736546, 0.67625467]

In [31]:
print(y_row_0)

[0.22733602, 0.31675834, 0.79736546, 0.67625467]


In [32]:
sum(y_row_0) / len(y_row_0)

0.5044286225

In [33]:
y_row_means[0] == sum(y_row_0) / len(y_row_0)

False

In [35]:
y_row_means[0]

0.5044286225651577

In [37]:
sum(y_row_0) / len(y_row_0)

0.5044286225

In [38]:
y_row_0_b = y[0]

In [39]:
y_row_0_b.mean()

0.5044286225651577

In [41]:
print( y_row_0_b )

[0.22733602 0.31675834 0.79736546 0.67625467]


In [40]:
y_row_0_b.sum() / y_row_0_b.size

0.5044286225651577

In [42]:
y_row_0_b.size

4