#### Import

In [1]:
import numpy as np
from scipy.stats import mielke, norm, gamma
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import gaussian_kde

#### Functions

In [2]:
def silverman_bandwidth(data):
    if len(data) < 2:
        raise ValueError("Need at least 2 data points")
    
    hi = np.std(data)
    lo = min(hi, np.percentile(data, 75) - np.percentile(data, 25) / 1.34)
    
    if lo == 0:
        lo = hi
    elif lo == 0:
        lo = abs(data[0])
    elif lo == 0:
        lo = 1
    
    return 0.9 * lo * len(data)**(-0.2) / hi


def H(tau, y, q):
    yL = y[:tau]
    yR = y[tau:]
    
    muL = np.quantile(yL, q)
    muR = np.quantile(yR, q)
    
    kappaLsq = q * (1 - q) / (gaussian_kde(yL)(muL)**2)
    kappaRsq = q * (1 - q) / (gaussian_kde(yR)(muR)**2)
    
    return (muL - muR) / (kappaLsq / tau + kappaRsq / (n - tau)) ** (1 / 2)

#### Data

In [3]:
df = pd.read_csv('cow_allwars_data.csv', header=None)
x = df[0].to_numpy().astype('float')
z = df[1].to_numpy().astype('float')
z[np.argmin(z)] += .01
y = np.log(z)
n = len(x)

#### Plot

In [4]:
plt.scatter(x, np.log(z))
plt.xlabel('year')
plt.ylabel('log number of dead')
plt.show()

In [6]:
khat, shat, loc, scalehat = mielke.fit(z, floc=1001)

In [7]:
muhat_lognorm, sigmahat_lognorm = norm.fit(y)

In [8]:
muhat = scalehat
thetahat = shat
alphahat = khat / shat

#### Comput Hn and plot

In [9]:
def H(tau, y, q):
    counting_tau = tau + 1
    yL = y[:tau]
    yR = y[tau:]
    
    muL = np.quantile(yL, q)
    muR = np.quantile(yR, q)
    
    # Calculate Silverman's bandwidth
    #bwL = silverman_bandwidth(yL)
    #bwR = silverman_bandwidth(yR)
    

    # Create KDE using Gaussian kernel and Silverman's bandwidth
    #kde = gaussian_kde(data, bw_method=bandwidth)
    
    kappaLsq = q * (1 - q) / (gaussian_kde(yL)(muL)**2)
    kappaRsq = q * (1 - q) / (gaussian_kde(yR)(muR)**2)
    
    return (muL - muR) / (kappaLsq / counting_tau + kappaRsq / (n - counting_tau)) ** (1 / 2)

#### Simulate lots of Hn plots

In [20]:
signif50 = 2.7
signif75 = 2.7
S = 1_000

sample50 = np.zeros(S)
sample75 = np.zeros(S)

In [21]:
for s in range(S):
    if s % 1000 == 0:
        print(s)
    #z = mielke(loc=1001, scale=scalehat, k=khat, s=shat).rvs(n)
    #y = np.log(z)
    
    y = np.random.randn(n)
    
    Hs50 = np.zeros(n)
    Hs75 = np.zeros(n)

    for i in range(5, n - 5):
        tau = i
        Hs50[i] = H(tau, y, .5)
        Hs75[i] = H(tau, y, .75)
        
    sample50[s] = np.max(Hs50)
    sample75[s] = np.max(Hs75)

0


In [22]:
sim_threshold50 = np.quantile(sample50, .95)
sim_threshold75 = np.quantile(sample75, .95)

print(sim_threshold50)
print(sim_threshold75)

2.7541580587136556
3.0354153332827774


In [23]:
np.sum(sample50 > signif75) / S

0.054

In [24]:
np.sum(sample75 > signif75) / S

0.088

In [25]:
np.sum(np.logical_or(sample50 > signif75, sample75 > signif75)) / S

0.116

In [15]:
fjijf

NameError: name 'fjijf' is not defined

In [17]:
counter50 / S

0.5325

In [18]:
counter75 / S

0.132

In [19]:
counter_either / S

0.543

In [20]:
fjijf

NameError: name 'fjijf' is not defined

In [None]:
rpy2

In [None]:
n = len(x)
n

In [None]:
taus = np.arange(n)
Hs50 = np.zeros(n)
Hs75 = np.zeros(n)

for i in range(5, n - 3):
    tau = taus[i]
    Hs50[i] = H(tau, y, .5)
    Hs75[i] = H(tau, y, .75)

In [None]:
plt.plot(x[5:-4], Hs50[5:-4])
plt.plot(x[5:-4], Hs75[5:-4])
plt.show()

In [None]:
a = np.arange(100)
a

In [None]:
a[5:-5]

In [None]:
a[:3]

In [None]:
a[3:]

In [None]:
np.quantile(y, .75)

In [None]:
import numpy as np
from scipy.stats import gaussian_kde

def silverman_bandwidth(x):
    if len(x) < 2:
        raise ValueError("Need at least 2 data points")
    
    hi = np.std(x)
    lo = min(hi, np.percentile(x, 75) - np.percentile(x, 25) / 1.34)
    
    if lo == 0:
        lo = hi
    elif lo == 0:
        lo = abs(x[0])
    elif lo == 0:
        lo = 1
    
    return 0.9 * lo * len(x)**(-0.2) / hi

# Example data
filt = np.random.random(5) < .3
data = np.random.normal(loc=0, scale=1, size=5) * filt + np.random.gamma(shape=2, size=5) * (1 - filt)

# Calculate Silverman's bandwidth
bandwidth = silverman_bandwidth(data)

# Create KDE using Gaussian kernel and Silverman's bandwidth
kde = gaussian_kde(data, bw_method=bandwidth)

# Evaluate KDE at specific points
x_values = np.linspace(-3, 3, 100)
kde_values = kde.evaluate(x_values)

# Plot the KDE
import matplotlib.pyplot as plt
plt.plot(x_values, kde_values)
plt.plot(x_values, gaussian_kde(data)(x_values))
plt.show()

In [None]:
np.random.gamma()