Skip to content
This repository has been archived by the owner on Feb 13, 2024. It is now read-only.

Commit

Permalink
Fix both windows issues with seed and rcont2 limit
Browse files Browse the repository at this point in the history
  • Loading branch information
maclandrol committed Mar 2, 2017
1 parent 4c4f910 commit b1ec158
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 33 deletions.
91 changes: 63 additions & 28 deletions FisherExact/Fisher.py
Expand Up @@ -7,15 +7,17 @@
from .statlib.asa205 import enum as rcont
import numpy as np
import logging
import os
import random


class F2PYSTOP(Exception):

def __call__(self, status, mes=""):
raise self.__class__(mes)

f.f2pystop = F2PYSTOP()


def fisher_exact(table, alternative="two-sided", hybrid=False, midP=False,
simulate_pval=False, replicate=2000, workspace=300,
attempt=3, seed=None):
Expand Down Expand Up @@ -52,10 +54,8 @@ def fisher_exact(table, alternative="two-sided", hybrid=False, midP=False,
Number of attempts to try, if the workspace size is not enough.
On each attempt, the workspace size is doubled. Default value is 3
seed : int
Random number to use as seed. If a seed isn't provided. 4 bytes will be
read from os.urandom. If this fail, getrandbits of the random module
(with 32 random bits) will be used. In the particular case where both
failed, the current time will be used
Random number to use as seed. If a seed isn't provided random.systemrandom
will be used, if this failed the current time will be used.
Returns
-------
Expand Down Expand Up @@ -106,7 +106,7 @@ def fisher_exact(table, alternative="two-sided", hybrid=False, midP=False,

if (nr == 2 and nc == 2):
# I'm not sure what the fisher_exact module of ss do.
# So use my own function if midp is asked
# So use my own implementation of fisher exact if midp is asked
if not midP:
# in this case, just use the default scipy
# could remove this in the future
Expand All @@ -127,8 +127,13 @@ def fisher_exact(table, alternative="two-sided", hybrid=False, midP=False,
'Less than 2 non-zero column or row marginal,\n %s' % c)

statistic = -np.sum(lgamma(c + 1))
tmp_res = _fisher_sim(c, replicate, seed)
tmp_res = _fisher_sim(c, replicate, seed, workspace)
almost = 1 + 64 * np.finfo(np.double).eps
# prevent value of 0, this is actually the best estimator
# alternatively, we could fit a distribution and compute pval
if np.sum(tmp_res <= statistic) < 1:
logging.warning(
"All simulated values are lower than table statistic : pval technically of 0.")
pval = (1 + np.sum(tmp_res <= statistic / almost)) / \
(replicate + 1.)
elif hybrid:
Expand All @@ -147,12 +152,12 @@ def _execute_fexact(nr, nc, c, nnr, expect, percnt, emin, workspace,
attempt=2, midP=False):
"""Execute fexact using the fortran routine"""

## find required workspace
# find required workspace
#ntot = np.sum(c)+1
#nco = max(nr, nc)
#nro = nr +nc - nco
#allocated = __iwork(0, ntot, 'double')
#allocated = __iwork(allocated, nco) *3
#allocated = __iwork(allocated, nco) *3
#allocated = __iwork(allocated, nco) *2
#k = nro + nco +1
#kk = k*nco
Expand All @@ -174,7 +179,7 @@ def _execute_fexact(nr, nc, c, nnr, expect, percnt, emin, workspace,
except Exception as error:
logging.warning(
"Workspace : %d is not enough. You should increase it.")
wk = wk << 1 #double workspace
wk = wk << 1 # double workspace
if not success:
raise ValueError('Could not execute fexact, increase workspace')
if midP:
Expand All @@ -183,7 +188,7 @@ def _execute_fexact(nr, nc, c, nnr, expect, percnt, emin, workspace,
return pval[1]


def _fisher_sim(c, replicate, seed=None):
def _fisher_sim(c, replicate, seed=None, wkslimit=5000):
"""Performs a simulation with `replicate` replicates in order to find an
alternative contingency test with the same margin.
Parameters
Expand All @@ -192,42 +197,72 @@ def _fisher_sim(c, replicate, seed=None):
A m x n contingency table. Elements should be non-negative integers.
replicate : int
Number of replicates to perform for the simulation
seed : int
A random number to be used as seed
wkslimit : int
working space limit for the size of array containing factorial
"""

DFAULT_MAX_TOT = 5000
# set default maxtot to wkslimit
if wkslimit < DFAULT_MAX_TOT:
wkslimit = 5000

if seed is None:
try:
seed = os.urandom(4)
seed = int(seed.encode('hex'), 16)
seed = random.SystemRandom().randint(1, 100000)
seed = np.array([seed], dtype=np.int32)
except:
try:
seed = int(random.getrandbits(32))
except:
import time
seed = int(time.time())
seed = np.array([seed], dtype=np.int32)
except:
seed = 12345
seed = np.array([seed], dtype=np.int32)

seed = np.array([seed], dtype='int32')
key = np.array([False], dtype=bool)
ierror = np.array([0], dtype='int32')
sr, sc = c.sum(axis=1).astype('int32'), c.sum(axis=0).astype('int32')
ierror = np.array([0], dtype=np.int32)
sr, sc = c.sum(axis=1).astype(np.int32), c.sum(axis=0).astype(np.int32)
nr, nc = len(sr), len(sc)
n = np.sum(sr)
results = np.zeros(replicate)

fact = np.zeros(n + 1)
if n < wkslimit:
# we can just set the limit to the table sum
wkslimit = n
pass
else:
# throw error immediately
raise ValueError(
"Limit of %d on the table sum exceded (%d), please increase workspace !" % (DFAULT_MAX_TOT, n))

maxtot = np.array([wkslimit], dtype=np.int32)

f = np.zeros(n + 1)
for i in xrange(2, n + 1):
fact[i] = fact[i - 1] + np.log(i)
f[i] = f[i - 1] + np.log(i)

observed = np.zeros((nr, nc), dtype=np.int32, order='F')

fact = np.zeros(wkslimit + 1, dtype=np.float, order='F')

observed = np.zeros((nr, nc), dtype="int32", order='F')
for it in xrange(replicate):
rcont2(nrow=nr, ncol=nc, nrowt=sr, ncolt=sc, key=key,
seed=seed, matrix=observed, ierror=ierror)
rcont2(nrow=nr, ncol=nc, nrowt=sr, ncolt=sc, maxtot=maxtot,
key=key, seed=seed, fact=fact, matrix=observed, ierror=ierror)
# if we do not have an error, make spcial action
ans = 0.
tmp_observed = observed.ravel()
if ierror[0] != 0:
raise ValueError("Fortran subroutine rcont2 return an error !")
if ierror[0] in [1, 2]:
raise ValueError(
"Error in rcont2 (fortran) : row or column input size is less than 2!")
elif ierror[0] in [3, 4]:
raise ValueError(
"Error in rcont2 (fortran) : Negative values in table !")
elif ierror[0] == 6:
# this shouldn't happen with the previous check
raise ValueError(
"Error in rcont2 (fortran) : Limit on the table sum (%d) exceded, please increase workspace !" % DFAULT_MAX_TOT)
for j in xrange(nc):
i = 0
ii = j * nr
Expand All @@ -241,7 +276,7 @@ def _fisher_sim(c, replicate, seed=None):

def __iwork(allocated, number, itype='int'):
"""Check if the allocated memory is enough"""

i = allocated
if itype == 'double':
allocated += (number << 1)
Expand All @@ -258,7 +293,7 @@ def _midp(c):
c : array_like of ints
A m x n contingency table. Elements should be non-negative integers.
"""
sr, sc = c.sum(axis=1).astype('int32'), c.sum(axis=0).astype('int32')
sr, sc = c.sum(axis=1).astype(np.int32), c.sum(axis=0).astype(np.int32)
nr, nc = len(sr), len(sc)
n = np.sum(sr)
global result
Expand Down
11 changes: 8 additions & 3 deletions FisherExact/statlib/asa159.f90
Expand Up @@ -301,7 +301,7 @@ function r8_uniform_01 ( seed )

return
end
subroutine rcont2 ( nrow, ncol, nrowt, ncolt, key, seed, matrix, ierror )
subroutine rcont2 ( nrow, ncol, nrowt, ncolt, maxtot, key, seed, fact, matrix, ierror )

!*****************************************************************************80
!
Expand Down Expand Up @@ -343,28 +343,33 @@ subroutine rcont2 ( nrow, ncol, nrowt, ncolt, key, seed, matrix, ierror )
! Input, integer ( kind = 4 ) NROWT(NROW), NCOLT(NCOL), the row and column
! sums. Each entry must be positive.
!
! Input, integer ( kind = 4 ) MAXTOT, the max total size for the matrix
! column sum.
!
! Input/output, logical KEY, a flag that indicates whether data has
! been initialized for this problem. Set KEY = .FALSE. before the first
! call.
!
! Input/output, integer ( kind = 4 ) SEED, a seed for the random number
! generator.
!
! Input/output, real ( kind = 4 ) fact, contains the log factorials
!
! Output, integer ( kind = 4 ) MATRIX(NROW,NCOL), the matrix.
!
! Output, integer ( kind = 4 ) IERROR, an error flag, which is returned
! as 0 if no error occurred.
!
implicit none

integer ( kind = 4 ), parameter :: maxtot = 5000

integer ( kind = 4 ) ncol
integer ( kind = 4 ) nrow
integer ( kind = 4 ) maxtot
real ( kind = 8 ), intent(inout) :: fact(maxtot+1)

logical done1
logical done2
real ( kind = 8 ), save, dimension ( maxtot+1 ) :: fact
integer ( kind = 4 ) i
integer ( kind = 4 ) ia
integer ( kind = 4 ) iap
Expand Down
4 changes: 2 additions & 2 deletions recompile.sh
@@ -1,5 +1,5 @@
cd statlib
cd FisherExact/statlib
f2py -m asa159 asa159.f90 -c
f2py -m asa205 asa205.f90 -c
f2py -c fexact.pyf FEXACT.F90 prterr.f
cd ..
cd ../..

0 comments on commit b1ec158

Please sign in to comment.