In [1]:
import numpy as np
from __future__ import division
from scipy.special import erf

In [2]:
#Explanation and Use:
#This function takes the number of samples in a dataset and returns the number of standard deviations beyond which
#one would expect to see a single data point 50% of the time.  If there exist many points more than this 
#number of standard deviations away, then they may be caused by some influence other than stochastic error.

#Detailed Notes:
#The actual rate of occurrence is limited by the zStep, this is accounted for in the results by printing the acutal 
#number of samples at which one can expect to see one occurrence.  This value is rounded to the nearest whole number.
#Since it is constant in zStep is not constant in numSamples, the calculation becomes much less accurate as Z
#increases.  

In [3]:
def numSamples_zScore(numSamples, OneSided = True, zTable_min = 1, zTable_max = 6.001, zStep = 0.001):
    zStep = float(zStep)
    #Array of z values to be searched
    zTable = np.arange(zTable_min,zTable_max,zStep)
    
    #One sided z Score is calculated as half the two-sided score
    if OneSided == False:
        numSamples_table = 1/(0.5*(1-erf(zTable/np.sqrt(2))))
        print "TwoSided"
    else:
        numSamples_table = 1/(1-erf(zTable/np.sqrt(2)))
        print 'OneSided'
    
    #Find the index of the zScore with the nearest number of samples
    z_ind = np.argmin(abs(numSamples_table - numSamples))
    
    z = zTable[z_ind]
    numSamples_zScore = int(round(numSamples_table[z_ind]))
    
    print "Z = " + str(z)
    print "Expect to see one occurance per: " + str(numSamples_zScore) + " samples."
    
    return round(z,3)

In [15]:
#Example Call
z = numSamples_zScore(8606, False, 1,8,0.001)

TwoSided
Z = 3.681
Expect to see one occurance per: 8609 samples.


In [16]:
z

3.681