In [88]:
import pandas as pd
from scipy.stats import nbinom, poisson
import numpy as np
from joblib import Parallel, delayed

def truncNegBin_logCDF(y, n, p):
    f_zero = nbinom.pmf(0, n, p)
    if y > 0:
        return np.log((nbinom.cdf(y, n, p) - nbinom.cdf(0, n, p)) / (1 - f_zero))
    else:
        return np.log(0)

#log.p	logical; if TRUE, probabilities p are given as log(p)
def qnbinom(p, nNbinom, pNbinom, log_p=False):
    # Convert p to array if it's a single value
    if not isinstance(p, (list, np.ndarray)):
        p = np.array([p])
    
    # Set log-probabilities (lower tail)
    n = len(p)
    if log_p:
        logp = p
    else:
        logp = np.log(p)
    
    # Set output and deal with special cases (outputs NA and Inf)
    quantiles = np.full(n, np.nan)
    nna = ~np.isnan(logp)
    nlogp = logp[nna]
    if len(nlogp) == 0:
        return quantiles
    
    quantiles[nna] = np.full(len(nna), np.inf)
    if np.min(nlogp) >= 0:
        return quantiles


    # calculate mean and variance out of n and p
    mean = (nNbinom * (1 - pNbinom)) / pNbinom
    var = (nNbinom * (1 - pNbinom)) / (pNbinom**2)

    # Set log-CDF vector
    lp_max = np.max(nlogp[nlogp < 0])
    upper = int(mean + np.sqrt(var/(1-np.exp(lp_max)))) #Chebychev inequality
    logcdf = nbinom.logcdf(np.arange(upper+1), nNbinom, pNbinom)

    # Compute output
    for i in range(n):
        if nna[i]:
            if logp[i] < 0:
                quantiles[i] = np.sum(logcdf < logp[i])
    
    # Return output
    if len(quantiles) == 1:
        return quantiles[0]
    else:
        return quantiles

#log.p	logical; if TRUE, probabilities p are given as log(p)    
def qnbinom_trunc(p, nNbinom, pNbinom, log_p=False):
    # if f(0)=0 no truncation is needed
    if nbinom.pmf(0, nNbinom, pNbinom) <= 1e-6:
        print('Juhu Abkürzung')
        return nbinom.ppf(p, nNbinom, pNbinom)
    else:
        # Convert p to array if it's a single value
        if not isinstance(p, (list, np.ndarray)):
            p = np.array([p])
        
        # Set log-probabilities (lower tail)
        n = len(p)
        if log_p:
            logp = p
        else:
            logp = np.log(p)
        
        # Set output and deal with special cases (outputs NA and Inf)
        quantiles = np.full(n, np.nan)
        nna = ~np.isnan(logp)
        nlogp = logp[nna]
        if len(nlogp) == 0:
            return quantiles
        
        quantiles[nna] = np.full(len(nna), np.inf)
        if np.min(nlogp) >= 0:
            return quantiles

        # calculate mean and variance out of n and p
        mean = (nNbinom * (1 - pNbinom)) / pNbinom
        var = (nNbinom * (1 - pNbinom)) / (pNbinom**2)

        # Set log-CDF vector
        lp_max = np.max(nlogp[nlogp < 0])

        # find a adequate upper limit, starting from the extreme conservative chebychev inequality
        upper = int(mean + np.sqrt(var/(1-np.exp(lp_max)))) #Chebychev inequality
        while np.exp(truncNegBin_logCDF(upper-50, nNbinom, pNbinom)) > 0.999:
            upper = upper - 50

        logcdf = np.array([truncNegBin_logCDF(yi, nNbinom, pNbinom) for yi in range(1, int(upper)+1)]) 

        # Compute output
        for i in range(n):
            if nna[i]:
                if logp[i] < 0:
                    quantiles[i] = np.sum(logcdf < logp[i]) + 1 #+1 because 0 is truncated
        
        # Return output
        if len(quantiles) == 1:
            return quantiles[0]
        else:
            return quantiles

In [51]:
def truncNegBin_CDF(y, n, p):
    f_zero = nbinom.pmf(0, n, p)
    if y > 0:
        return (nbinom.cdf(y, n, p) - nbinom.cdf(0, n, p)) / (1 - f_zero)
    else:
        return 0

def truncNegBin_PPF(x, n, p, epsilon=1e-6, max_iterations=100):
    # if f(0)=0 no truncation is needed
    if (1 - nbinom.pmf(0, n, p)) == 1:
        return nbinom.ppf(x, n, p)
    else:
        # Define the range of y where the solution might exist
        lower_bound = 0
        upper_bound = 1000000000  # Adjust this based on the expected range of y

        # Bisection method
        for _ in range(max_iterations):
            y = (lower_bound + upper_bound) / 2
            cdf_value = truncNegBin_CDF(y, n, p)

            if abs(cdf_value - x) < epsilon:
                return np.ceil(y)  # Found a good approximation

            if cdf_value < x:
                lower_bound = y
            else:
                upper_bound = y

        # Return the best approximation if max_iterations is reached
        return np.ceil(y)

def calculate_trunc_nbinom_quantile(quantile, n, p):
    return truncNegBin_PPF(quantile, n, p)

In [82]:

def your_function_to_profile(quantiles, n, p):
    return qnbinom_trunc(quantiles, n, p)

In [94]:
# calculation of quantiles
quantiles = np.arange(0.001, 0.9999, 0.001)
quantiles = [round(q, 3) for q in quantiles] # due to binary inaccuracies

# Example usage
mean = 2303.33333333334
var = 288800800.88889

n = (mean**2) / (var - mean) # equivalent to r
p = mean / var

print(n)
print(p)

a = nbinom.ppf(quantiles, n, p)
#b = qnbinom_trunc(quantiles, n, p)
""" trunc_nbinom_quantiles = Parallel(n_jobs=-1)(delayed(calculate_trunc_nbinom_quantile)(quantile, n, p) for quantile in quantiles) #fast way
c = np.array(trunc_nbinom_quantiles) """
a

0.01837040181770293
7.975508815224853e-06


array([0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
       0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.000

In [95]:
def truncNegBin_logCDF2(y, n, p):
    is_scalar = np.isscalar(y)  # Überprüfen, ob y ein einzelner Wert ist oder ein Array

    if is_scalar:
        y = np.array([y])  # Wandle den einzelnen Wert in ein numpy Array um

    f_zero = nbinom.pmf(0, n, p)
    log_cdf_y = np.log((nbinom.cdf(y, n, p) - nbinom.cdf(0, n, p)) / (1 - f_zero))
    log_cdf_y[y <= 0] = np.log(0)  # Setzen Sie die Werte auf np.log(0), wenn y <= 0

    if is_scalar:
        return log_cdf_y[0]  # Gib den einzelnen Wert zurück, wenn y ein einzelner Wert war
    else:
        return log_cdf_y

truncNegBin_logCDF2(2,n,p)

yarray = np.arange(1, int(10000) + 1)  # Stellen Sie sicher, dass yarray ein numpy Array ist
a = result_array = truncNegBin_logCDF2(yarray, n, p)
print('Nummer 1')

b = np.array([truncNegBin_logCDF(yi, n, p) for yi in range(1, int(10000)+1)]) 
print('Nummer 2')



  log_cdf_y[y <= 0] = np.log(0)  # Setzen Sie die Werte auf np.log(0), wenn y <= 0


Nummer 1
Nummer 2


In [71]:
import cProfile
import pstats
cProfile.run('your_function_to_profile(quantiles, n, p)', 'profile_results')

# Die Profilierungsergebnisse lesen und nach der selbst verbrauchten Zeit (tottime) sortieren
stats = pstats.Stats('profile_results')
stats.sort_stats('tottime')  # Sortieren nach der selbst verbrauchten Zeit
stats.print_stats() 

Sun Jul  2 17:17:25 2023    profile_results

         5680959 function calls (5608705 primitive calls) in 10.734 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   144508    1.437    0.000    1.972    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\stride_tricks.py:340(_broadcast_to)
    24084    1.159    0.000    7.294    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3436(cdf)
434525/362271    0.694    0.000    7.128    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
    12043    0.420    0.000    3.240    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3359(pmf)
    72254    0.408    0.000    0.818    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\core\_methods.py:93(_clip_de

<pstats.Stats at 0x2222345fd50>

In [83]:
import cProfile
import pstats
cProfile.run('your_function_to_profile(quantiles, n, p)', 'profile_results')

# Die Profilierungsergebnisse lesen und nach der selbst verbrauchten Zeit (tottime) sortieren
stats = pstats.Stats('profile_results')
stats.sort_stats('tottime')  # Sortieren nach der selbst verbrauchten Zeit
stats.print_stats() 

Sun Jul  2 17:28:08 2023    profile_results

         5680959 function calls (5608705 primitive calls) in 11.827 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
   144508    1.573    0.000    2.155    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\stride_tricks.py:340(_broadcast_to)
    24084    1.273    0.000    8.066    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3436(cdf)
434525/362271    0.767    0.000    7.862    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
    12043    0.458    0.000    3.543    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3359(pmf)
    72254    0.451    0.000    0.900    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\core\_methods.py:93(_clip_de

<pstats.Stats at 0x222205e43d0>

In [75]:
import cProfile
import pstats
cProfile.run('cdf_to_profile(20, n, p)', 'profile_results')

# Die Profilierungsergebnisse lesen und nach der selbst verbrauchten Zeit (tottime) sortieren
stats = pstats.Stats('profile_results')
stats.sort_stats('tottime')  # Sortieren nach der selbst verbrauchten Zeit
stats.print_stats() 

Sun Jul  2 17:22:05 2023    profile_results

         475 function calls (469 primitive calls) in 0.003 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       12    0.001    0.000    0.001    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\stride_tricks.py:340(_broadcast_to)
        2    0.000    0.000    0.002    0.001 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3436(cdf)
    36/30    0.000    0.000    0.002    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.000    0.000    0.001    0.001 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3359(pmf)
        1    0.000    0.000    0.003    0.003 {built-in method builtins.exec}
        6    0.000    0.000    0.000    0.000 c:\Users\Tobias\AppData\Local\Programs\Pyth

<pstats.Stats at 0x2222372b110>

In [78]:
import cProfile
import pstats
cProfile.run('cdf_to_profile(20, n, p)', 'profile_results')

# Die Profilierungsergebnisse lesen und nach der selbst verbrauchten Zeit (tottime) sortieren
stats = pstats.Stats('profile_results')
stats.sort_stats('tottime')  # Sortieren nach der selbst verbrauchten Zeit
stats.print_stats() 

Sun Jul  2 17:26:00 2023    profile_results

         475 function calls (469 primitive calls) in 0.003 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       12    0.000    0.000    0.001    0.000 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\stride_tricks.py:340(_broadcast_to)
        2    0.000    0.000    0.002    0.001 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3436(cdf)
        1    0.000    0.000    0.001    0.001 c:\Users\Tobias\AppData\Local\Programs\Python\Python311\Lib\site-packages\scipy\stats\_distn_infrastructure.py:3359(pmf)
    36/30    0.000    0.000    0.002    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.000    0.000    0.003    0.003 {built-in method builtins.exec}
        6    0.000    0.000    0.000    0.000 {method 'reduce' of 'numpy.ufunc' objects}


<pstats.Stats at 0x222236bbd10>

In [None]:
len(d[d == False])

In [None]:
mean = 4
var = 6

n = (mean**2) / (var - mean) # equivalent to r
p = mean / var

mean = (n * (1 - p)) / p
var = (n * (1 - p)) / (p**2)
print(mean)
print(var)