In [1]:
from significance_of_mean_cuda import significance_of_mean_cuda
from utils import significance_of_mean, getNumerator, pValue
import numpy as np
import time
import multiprocessing
import concurrent.futures as cf
import matplotlib.pyplot as plt

In [2]:
import pandas as pd
import seaborn as sns

In [3]:
import matplotlib as mpl

mpl.rcParams['text.usetex'] = False  # not really needed

### Get synthetic data

In [4]:
num_examples = 1

In [5]:
N = [50,100, 150, 200, 250]
s =  [10, 50, 100, 200]

#### The calculations are parallelized over the samples on five cores in total. 

In [6]:
def p_value_calc(args):
    a,b, bins = args
    p=significance_of_mean(a,b, bins)[0]
    return p

def calibration_series_generator(A,B, S):
    num_tests = A.shape[0]
    for i in range(num_tests):
        a_sample = A[i].tolist()
        b_sample = B[i].tolist()
        yield ([a_sample,b_sample, S])

def calibration_test(A,B,bins):
    with cf.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()-3) as pool:
        p_list = list(pool.map(p_value_calc, calibration_series_generator(A,B, bins)))
    return p_list
    

In [7]:
cpu = list()
gpu = list()
n = N[1]
for bins in s:
    np.random.seed(42)
    A = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])
    B = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])

    start = time.time()
    SGM = significance_of_mean_cuda(bins,dtype_v=np.uint16,dtype_A=np.float64)
    SGM.run(A,B)
    p = SGM.get_p_values()
    end = time.time()
    t_gpu = end - start
    gpu.append(t_gpu)
    print("GPU: ", t_gpu)
    
    
    start = time.time()
    P = calibration_test(A,B,bins)
    end = time.time()
    t_cpu = end - start
    cpu.append(t_cpu)
    print("CPU1: ",t_cpu)
    

    
    x = list(A[0])
    y = list(B[0])
    m = len(x)
    n = len(y)
    z = x + y;z.sort()
    S = sum(z[m:])
    dtype = np.uint16
    
    start = time.time()
    N = getNumerator(m, n, S, list(z), np.float64)
    end = time.time()
    t_cpu = end - start
    cpu.append(t_cpu)
    print("CPU2: ",t_cpu)
    
    print(np.allclose(p,P))

GPU:  0.019036531448364258
CPU:  0.18491458892822266
True
GPU:  0.021863698959350586
CPU:  1.814922571182251
True
GPU:  0.02924346923828125


KeyboardInterrupt: 

In [None]:
plt.plot(s, cpu, 'r-', label='Not parallelized')
plt.plot(s, gpu, 'g-', label='Parallelized')
plt.legend(loc='upper left')
plt.xlabel(r"$n_{bins}$")
plt.ylabel("Time(s)")
plt.tight_layout()
plt.savefig("figures/normal_S")
plt.show()

In [None]:
s[2]

In [None]:
N[2]

In [None]:
cpu = list()
gpu = list()
bins = s[2]
for n in N:
    np.random.seed(42)
    A = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])
    B = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])

    start = time.time()
    SGM = significance_of_mean_cuda(bins,dtype_v=np.uint16,dtype_A=np.float64)
    PC = SGM.run(A,B)
    end = time.time()
    t_gpu = end - start
    gpu.append(t_gpu)
    print("GPU: ", t_gpu)
    
    
    start = time.time()
    P = calibration_test(A,B,bins)
    end = time.time()
    t_cpu = end - start
    cpu.append(t_cpu)
    print("CPU: ",t_cpu)

In [None]:
plt.plot(N, cpu, 'r-', label='Not parallelized')
plt.plot(N, gpu, 'g-', label='Parallelized')
plt.legend(loc='upper left')
plt.xlabel(r"$n$")
plt.ylabel("Time(s)")
plt.tight_layout()
plt.savefig("figures/normal_N")
plt.show()

In [None]:
N[1]

In [None]:
s[2]

In [None]:
num_examples = 200
n = N[1]
bins = s[2]

np.random.seed(42)
A = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])
B = np.asarray([np.random.normal(0, 1, n) for _ in range(num_examples)])

SGM = significance_of_mean_cuda(bins,dtype_v=np.uint16,dtype_A=np.float64)
PC = SGM.run(A,B)

pDf = getdf(PC)
my_scatter_plot(pDf,"figures/normal_calibration")

In [None]:
pDf = getdf(PC)
my_scatter_plot(pDf,"figures/normal_calibration")

In [None]:
num_examples = 300
n = N[0]
bins = s[3]

np.random.seed(42)
A = np.asarray([np.random.exponential(1, n) for _ in range(num_examples)])
B = np.asarray([np.random.exponential(1, n) for _ in range(num_examples)])

SGM = significance_of_mean_cuda(bins,dtype_v=np.uint16,dtype_A=np.float64)
PC = SGM.run(A,B)

pDf = getdf(PC)
my_scatter_plot(pDf,"hello")

In [None]:
cpu = list()
gpu = list()
n = N[0]
for bins in s:
    start = time.time()
    SGM = significance_of_mean_cuda(bins,dtype_v=np.uint16,dtype_A=np.float64)
    PC = SGM.run(A,B)
    end = time.time()
    t_gpu = end - start
    gpu.append(t_gpu)
    print("GPU: ", t_gpu)
    
    
    start = time.time()
    P = calibration_test(A,B,bins)
    end = time.time()
    t_cpu = end - start
    cpu.append(t_cpu)
    print("CPU: ",t_cpu)

In [None]:
def getdf(P):
    P.sort()
    p_arr = np.array(P)
    offset = 1.0/float(num_examples)
    ideal_arr = np.linspace(offset,1.0-offset,num_examples)
    Pdf = pd.DataFrame({'Observed p-value':p_arr,'Theoretical p-value':ideal_arr})
    return Pdf

In [None]:
def my_scatter_plot(df,save_name):
    sns.set(style="white")
    sns.set_context("talk")
    low = min(df["Theoretical p-value"])
    hi = max(df["Theoretical p-value"])
    f, ax = plt.subplots(figsize=(7, 7))
    ax.set(xscale="log", yscale="log")
    g=sns.regplot(x='Theoretical p-value', y ='Observed p-value', data=df,  ax=ax, fit_reg=False, scatter_kws={"s": 5})
    # sns.lmplot(x='Theoretical p-value', y ='Observed p-value', fit_reg=False, data = df, scatter_kws={"marker": "D",  "s": 10})
    # sns.relplot(x='Theoretical p-value', y ='Observed p-value', data = df)
    g.plot([low,hi], [low,hi], 'k-', linewidth=.5)
    sns.despine()
    f.tight_layout()
    f.savefig(save_name)

In [None]:
my_scatter_plot(Pdf,"hello")

In [None]:
np.allclose(P,PC)

### The parallelized version is faster and yields the same p-values 

In [None]:
round(107.3141598701477 / 14.397771835327148,1)

### Approximately 7.5 faster than the non-parallelized version.

### Comparison of increasing sample sizes. Sample-sizes larger than 160 yields memory error on the non-parallelized version, so the experiment stops there.

In [None]:
listsizes = [20,60,120,160]
plain_shift = list()
gpu_shift = list()
bins = 200
for size in listsizes:
    np.random.seed(1)
    A = np.asarray([np.random.beta(2.0,5.0,size) for _ in range(5)])
    B = np.asarray([np.random.beta(2.0,5.0,size) for _ in range(5)])
    start = time.time()
    P = calibration_test(A,B)
    end = time.time()
    plain_shift.append(round(end - start,3))
    print("Plain")
    print(round(end - start,3))
    
    start = time.time()
    SGM = significance_of_mean_cuda(bins,dtype_v=np.uint32,dtype_A=np.float64)
    PC = SGM.run(A,B)
    end = time.time()
    gpu_shift.append(round(end - start,3))
    print("GPU")
    print(round(end - start,3))
    
    print(np.allclose(PC,P))

In [None]:
plt.plot(listsizes, plain_shift, 'r-', label='Non-parallelized')
plt.plot(listsizes, gpu_shift, 'g-', label='GPU shift-method')
plt.legend(loc='upper left')
plt.xlabel("Sample size")
plt.ylabel("Time(s)")
plt.savefig("figures/comparison")
plt.show()