In [175]:
import pandas as pd
from scipy.stats import nbinom, poisson
import numpy as np
from joblib import Parallel, delayed

def truncNegBin_logCDF(y, n, p):
    is_scalar = np.isscalar(y)  # Überprüfen, ob y ein einzelner Wert ist oder ein Array

    if is_scalar:
        y = np.array([y])  # Wandle den einzelnen Wert in ein numpy Array um

    f_zero = nbinom.pmf(0, n, p)
    log_cdf_y = np.log((nbinom.cdf(y, n, p) - nbinom.cdf(0, n, p)) / (1 - f_zero))
    log_cdf_y[y <= 0] = float('-inf')  # Setzen Sie die Werte auf np.log(0), wenn y <= 0

    if is_scalar:
        return log_cdf_y[0]  # Gib den einzelnen Wert zurück, wenn y ein einzelner Wert war
    else:
        return log_cdf_y

#log.p	logical; if TRUE, probabilities p are given as log(p)    
def qnbinom_trunc(p, nNbinom, pNbinom, log_p=False):
    # if f(0)=0 no truncation is needed
    if nbinom.pmf(0, nNbinom, pNbinom) <= 1e-9:
        print('Juhu Abkürzung')
        return nbinom.ppf(p, nNbinom, pNbinom)
    else:
        # Convert p to array if it's a single value
        if not isinstance(p, (list, np.ndarray)):
            p = np.array([p])
        
        # Set log-probabilities (lower tail)
        n = len(p)
        if log_p:
            logp = p
        else:
            logp = np.log(p)
        
        # Set output and deal with special cases (outputs NA and Inf)
        quantiles = np.full(n, np.nan)
        nna = ~np.isnan(logp)
        nlogp = logp[nna]
        if len(nlogp) == 0:
            return quantiles
        
        quantiles[nna] = np.full(len(nna), np.inf)
        if np.min(nlogp) >= 0:
            return quantiles

        # calculate mean and variance out of n and p
        mean = (nNbinom * (1 - pNbinom)) / pNbinom
        var = (nNbinom * (1 - pNbinom)) / (pNbinom**2)

        # Set log-CDF vector
        lp_max = np.max(nlogp[nlogp < 0])

        # find an adequate upper limit, starting from the extreme conservative chebychev inequality
        upper = int(mean + np.sqrt(var/(1-np.exp(lp_max)))) #Chebychev inequality
        while np.exp(truncNegBin_logCDF(upper-1000, nNbinom, pNbinom)) > 0.999:
            upper = upper - 1000

        yarray = np.arange(1, int(upper)+1)
        logcdf = truncNegBin_logCDF(yarray, nNbinom, pNbinom)

        # Compute output
        for i in range(n):
            if nna[i]:
                if logp[i] < 0:
                    quantiles[i] = np.sum(logcdf < np.array(logp[i])) + 1 #+1 because 0 is truncated
        
        # Return output
        if len(quantiles) == 1:
            return quantiles[0]
        else:
            return quantiles

## bisektionsverfahren alt

In [151]:
def truncNegBin_CDF(y, n, p):
    f_zero = nbinom.pmf(0, n, p)
    if y > 0:
        return (nbinom.cdf(y, n, p) - nbinom.cdf(0, n, p)) / (1 - f_zero)
    else:
        return 0

def truncNegBin_PPF(x, n, p, epsilon=1e-6, max_iterations=100):
    # if f(0)=0 no truncation is needed
    if (1 - nbinom.pmf(0, n, p)) == 1:
        return nbinom.ppf(x, n, p)
    else:
        # Define the range of y where the solution might exist
        lower_bound = 0
        upper_bound = 1000000000  # Adjust this based on the expected range of y

        # Bisection method
        for _ in range(max_iterations):
            y = (lower_bound + upper_bound) / 2
            cdf_value = truncNegBin_CDF(y, n, p)

            if abs(cdf_value - x) < epsilon:
                return np.ceil(y)  # Found a good approximation

            if cdf_value < x:
                lower_bound = y
            else:
                upper_bound = y

        # Return the best approximation if max_iterations is reached
        return np.ceil(y)

def calculate_trunc_nbinom_quantile(quantile, n, p):
    return truncNegBin_PPF(quantile, n, p)

In [82]:

def your_function_to_profile(quantiles, n, p):
    return qnbinom_trunc(quantiles, n, p)

## Vergleich der Funktionen

In [180]:
import time
from scipy.stats import nbinom
from joblib import Parallel, delayed
import numpy as np

# calculation of quantiles
quantiles = np.arange(0.001, 0.9999, 0.001)
quantiles = np.round(quantiles, 3)

# Example usage
mean = 9.5
var = 3736.5

n = (mean**2) / (var - mean) # equivalent to r
p = mean / var

# Measure execution time for variable 'a'
start_time_a = time.time()
a = nbinom.ppf(quantiles, n, p)
end_time_a = time.time()
execution_time_a = end_time_a - start_time_a

# Measure execution time for variable 'b'
start_time_b = time.time()
b = qnbinom_trunc(quantiles, n, p)  # Assuming this function is defined somewhere in your code
end_time_b = time.time()
execution_time_b = end_time_b - start_time_b

# Measure execution time for variable 'c'
start_time_c = time.time()
trunc_nbinom_quantiles = Parallel(n_jobs=-1)(delayed(calculate_trunc_nbinom_quantile)(quantile, n, p) for quantile in quantiles)
c = np.array(trunc_nbinom_quantiles)
end_time_c = time.time()
execution_time_c = end_time_c - start_time_c

print("Execution time for variable 'a':", execution_time_a, "seconds")
print("Execution time for variable 'b':", execution_time_b, "seconds")
print("Execution time for variable 'c':", execution_time_c, "seconds")

Execution time for variable 'a': 0.0049974918365478516 seconds
Execution time for variable 'b': 0.03100132942199707 seconds
Execution time for variable 'c': 13.294989347457886 seconds


In [171]:
a

array([0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
       0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00,
       1.000000e+00, 1.000000e+00, 1.000000e+00, 1.0000

In [172]:
b

array([1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00,
       2.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00,
       2.000000e+00, 2.000000e+00, 3.000000e+00, 3.000000e+00,
       3.000000e+00, 3.000000e+00, 3.000000e+00, 3.000000e+00,
       4.000000e+00, 4.000000e+00, 4.000000e+00, 4.000000e+00,
       4.000000e+00, 5.000000e+00, 5.000000e+00, 5.000000e+00,
       5.000000e+00, 6.000000e+00, 6.000000e+00, 6.000000e+00,
       6.000000e+00, 7.000000e+00, 7.000000e+00, 7.000000e+00,
       8.000000e+00, 8.000000e+00, 8.000000e+00, 9.000000e+00,
       9.000000e+00, 9.000000e+00, 1.000000e+01, 1.000000e+01,
       1.100000e+01, 1.100000e+01, 1.200000e+01, 1.200000e+01,
       1.300000e+01, 1.300000e+01, 1.400000e+01, 1.400000e+01,
       1.500000e+01, 1.500000e+01, 1.600000e+01, 1.6000

In [173]:
c

array([1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00,
       1.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00,
       2.000000e+00, 2.000000e+00, 2.000000e+00, 2.000000e+00,
       2.000000e+00, 2.000000e+00, 3.000000e+00, 3.000000e+00,
       3.000000e+00, 3.000000e+00, 3.000000e+00, 3.000000e+00,
       4.000000e+00, 4.000000e+00, 4.000000e+00, 4.000000e+00,
       4.000000e+00, 5.000000e+00, 5.000000e+00, 5.000000e+00,
       5.000000e+00, 6.000000e+00, 6.000000e+00, 6.000000e+00,
       6.000000e+00, 7.000000e+00, 7.000000e+00, 7.000000e+00,
       8.000000e+00, 8.000000e+00, 8.000000e+00, 9.000000e+00,
       9.000000e+00, 9.000000e+00, 1.000000e+01, 1.000000e+01,
       1.100000e+01, 1.100000e+01, 1.200000e+01, 1.200000e+01,
       1.300000e+01, 1.300000e+01, 1.400000e+01, 1.400000e+01,
       1.500000e+01, 1.500000e+01, 1.600000e+01, 1.6000

In [174]:
b == c

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [None]:
import cProfile
import pstats
cProfile.run('your_function_to_profile(quantiles, n, p)', 'profile_results')

# Die Profilierungsergebnisse lesen und nach der selbst verbrauchten Zeit (tottime) sortieren
stats = pstats.Stats('profile_results')
stats.sort_stats('tottime')  # Sortieren nach der selbst verbrauchten Zeit
stats.print_stats() 