# ggsdtpy

In [1]:
import math
import numpy as np
import pandas as pd
from scipy.stats import gennorm
from scipy.optimize import minimize, SR1

## likelihood function

In [2]:
def fit_ggsdt_ll(x, parameters):
    m2 =   x[0]
    alp2 = x[1]
    bet =  x[2]
    cri =  x[3:]
    
    nR_S1, nR_S2 = parameters

    exp_far = gennorm.cdf(cri, scale = 1, beta = bet, loc = 0)
    exp_hr =  gennorm.cdf(cri - m2, scale = alp2, beta = bet, loc = 0)

    exp_s1 = np.sum(nR_S1) * np.diff(exp_far)
    exp_s2 = np.sum(nR_S2) * np.diff(exp_hr)
    
    exp_fas =  np.hstack([np.sum(nR_S1) * exp_far[0], exp_s1, np.sum(nR_S1) \
    - np.sum(nR_S1) * exp_far[0] - np.sum(exp_s1)])
    exp_hits = np.hstack([np.sum(nR_S2) * exp_hr[0],  exp_s2, np.sum(nR_S2) \
    - np.sum(nR_S2) * exp_hr[0]  - np.sum(exp_s2)])
    
    with np.errstate(invalid = 'ignore'):
        ll = np.sum(nR_S2 * np.log(exp_hits / np.sum(nR_S2)) \
        + nR_S1 * np.log(exp_fas / np.sum(nR_S1)))
   
    if np.isinf(ll) or np.isnan(ll):
        ll = -1e+300 # returning "-inf" may cause optimize.minimize() to fail
    
    return -ll

## model fitting

In [3]:
def fit_ggsdt(nR_S1, nR_S2, add_constant = True):
    
    if (add_constant):
        nR_S1 = nR_S1 + (1 / len(nR_S1))
        nR_S2 = nR_S2 + (1 / len(nR_S2))
    
    n_ratings = int(len(nR_S1) / 2)
    far = 1 - np.cumsum(nR_S1) / np.sum(nR_S1)
    hr =  1 - np.cumsum(nR_S2) / np.sum(nR_S2)

    # set up initial guess for parameter values
    alp2 = 1
    bet = 2
    mu2 = gennorm.ppf(hr[n_ratings - 1], scale = alp2, beta = bet) \
          - gennorm.ppf(far[n_ratings - 1], scale = alp2, beta = bet)
    cri = -1 * gennorm.ppf(far, scale = alp2, beta = bet)
    cri = cri[0:(2 * n_ratings - 1)]
    
    guess = np.hstack([mu2, alp2, bet, cri])

    # model fitting
    parameters = [nR_S1, nR_S2]
    
    fit = minimize(fit_ggsdt_ll, guess, args = parameters, method = 'trust-constr',
                       jac = '2-point', hess = SR1())
                       
    m2 =   fit.x[0]
    alp2 = fit.x[1]
    bet =  fit.x[2]
    cri =  fit.x[3:]
    ll =   -fit.fun
    sd1 =  math.sqrt((1**2 * math.gamma(3 / bet)) / math.gamma(1 / bet))
    sd2 =  math.sqrt((alp2**2 * math.gamma(3 / bet)) / math.gamma(1 / bet))
    kurt = (math.gamma(5 / bet) * math.gamma(1 / bet)) / math.gamma(3 / bet)**2 - 3

    est = pd.DataFrame()
    est["mu2"] = [m2]
    est["alpha2"] = [alp2]
    est["beta"] = [bet]
    est["loglike"] = [ll]
    est["sigma1"] = [sd1]
    est["sigma2"] = [sd2]
    est["kurtosis"] = [kurt]
    
    for i in range(2 * n_ratings - 1):
        est["c" + str(i + 1)] = [fit.x[3 + i]]
    
    return est

## examples

In [4]:
nR_S1 = np.array([170, 120, 50, 25, 45, 40])
nR_S2 = np.array([40, 50, 30, 20, 70, 240])

fit_ggsdt(nR_S1, nR_S2, add_constant = False)

Unnamed: 0,mu2,alpha2,beta,loglike,sigma1,sigma2,kurtosis,c1,c2,c3,c4,c5
0,1.090558,1.306446,1.781324,-1338.253235,0.75595,0.987609,0.257916,-0.224036,0.271486,0.500422,0.635745,1.012405
