# All necessary modules and sub modules:-

In [4]:
from sklearn.metrics import pairwise_distances as outer
from scipy.spatial.distance import cdist
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from scipy.stats import hypergeom as hg
from typing import Literal
from multiprocessing import Queue,Process,Value,Array
import ctypes
from time import sleep

In [1]:
import sys
def Bar(it, prefix="", size=60, out=sys.stdout):
    count = len(it)
    def show(j):
        x = int(size*j/count)
        print(f"{prefix}[{'#'*x}{'.'*(size-x)}] {j}/{count}",end='\r', file=out, flush=True)
    show(0)
    for i, item in enumerate(it):
        yield item
        show(i+1)
    print("\n", flush=True, file=out)

# 2 sample 2x2 contingency method implementation:-

In [5]:
def X_deg_1(X,Y,alpha=0.05,alternative:Literal['x!=y','x<y','x>y']='x!=y'):
    n1,p=X.shape
    n2,_=Y.shape
    N=n1+n2
    assert p==_,"dimensions of x and y should be equal"
    adj=outer(np.vstack((X,Y)))
    g=nx.from_numpy_array(adj)
    mst=nx.minimum_spanning_tree(g)
    deg=[int(mst.degree[i]==1) for i in range(N)]
    dx1,dy1=map(sum,[deg[:n1],deg[n1:]])
    dx2,dy2=n1-dx1,n2-dy1
    d1,d2=dx1+dy1,dx2+dy2
    p_value=-1
    if alternative=='x!=y':p_value=2*hg.cdf(dx1,N,d1,n1)
    elif alternative=='x>y':p_value=hg.sf(dx1,N,d1,n1)
    else:p_value=1-hg.sf(dx1,N,d1,n1)
    return p_value,('accept H0' if p_value>alpha else 'reject H0')

In [7]:
def exp(p,n,m,delta,sigma,cdf=np.random.normal,trials=100):
    col=len(p)
    r=[0]*col
    for i in range(col):
        for _ in Bar(range(trials),f"Status[{i+1}/{col}]:"):
            x=cdf(0,1,(n[i],p[i]))
            y=cdf(delta[i],sigma[i],(m[i],p[i]))
            p_val,ver=X_deg_1(x,y,alternative='x!=y')
            if ver=='reject H0':r[i]+=1
    df=pd.DataFrame([r],columns=[f'p={p[i]},d={delta[i]},s={sigma[i]}'for i in range(5)],index=['X(deg 1)'])
    return r,df

# Fruitless effort towards parallelization throuth multi-threading:-

In [3]:
def process(p,n,m,delta,sigma,rep,r):
    print("here!")
    for i in range(len(p)):
        for _ in range(rep): 
            # next(it)
            x=np.random.normal(0,1,(n[i],p[i]))
            y=np.random.normal(delta[i],sigma[i],(m[i],p[i]))
            p_val,ver=X_deg_1(x,y,alternative='x!=y')
            if ver=='reject H0':r[i]+=1
    # return r
def simul(p,n,m,delta,sigma,trials,k=20):
    out=Queue()
    r=Array(ctypes.c_int,len(p))
    t=[Process(target=process,args=(p,n,m,delta,sigma,trials//k,r)) for i in range(k)]
    for i in t:i.start()
    for i in t:i.join()
    df=pd.DataFrame([list(r)],columns=[f'p={p[i]},d={delta[i]},s={sigma[i]}'for i in range(len(p))],index=['X(deg 1)'])
    return r,df

In [6]:
p=[1,2,5,10,20]
n=[100]*5
m=[100]*5
delta=[0]*5
sigma=[1.3,1.2,1.2,1.1,1.075]
trials=100
# r=Array(ctypes.c_int,5)
# t=[Process(target=process,args=(p,n,m,delta,sigma,trials//k,r)) for i in range(k)]
k=10

def dummy():
    print("hi!",flush=True)
    sys.stdout.flush()
    sleep(1)
t=[Process(target=dummy) for i in range(k)]
for i in t:i.start()
for i in t:i.join()
print(t[0])

<Process name='Process-1' pid=27348 parent=24384 stopped exitcode=1>


# Normal Distribution(Scale Shift):-

In [8]:
r,scale=exp(p=[10],n=[100],m=[100],delta=[0],sigma=[1.3],trials=2600000)
# r,scale=simul(p=[1,2,5,10,20],n=[1000]*5,m=[1000]*5,delta=[0]*5,sigma=[1.3,1.2,1.2,1.1,1.075],trials=1000)
scale

Status[1/1]:[............................................................] 13512/2600000

KeyboardInterrupt: 

# Normal Distribution(Location Shift):-

In [8]:
r,loc=exp(p=[1,2,5,10,20],n=[100]*5,m=[100]*5,delta=[0.3,0.5,0.75,1,1.2],sigma=[1]*5,trials=1000)
loc

Status[1/5]:[############################################################] 1000/1000

Status[2/5]:[############################################################] 1000/1000

Status[3/5]:[############################################################] 1000/1000

Status[4/5]:[############################################################] 1000/1000

Status[5/5]:[############################################################] 1000/1000



Unnamed: 0,"p=1,d=0.3,s=1","p=2,d=0.5,s=1","p=5,d=0.75,s=1","p=10,d=1,s=1","p=20,d=1.2,s=1"
X(deg 1),0,12,4,2,1


# Log Normal Distribution(Location Shift):-

In [5]:
r,log_norm=exp(p=[1,2,5,10,20],n=[100]*5,m=[100]*5,delta=[0.4,0.4,0.3,0.3,0.3],sigma=[1]*5,cdf=np.random.lognormal,trials=1000)
log_norm

Status[1/5]:[############################################################] 100/100

Status[2/5]:[############################################################] 100/100

Status[3/5]:[############################################################] 100/100

Status[4/5]:[############################################################] 100/100

Status[5/5]:[############################################################] 100/100



Unnamed: 0,"p=1,d=0.4,s=1","p=2,d=0.4,s=1","p=5,d=0.3,s=1","p=10,d=0.3,s=1","p=20,d=0.3,s=1"
X(deg 1),0,1,2,14,61
