In [1]:
import time
import timeit
import numpy as np
from sklearn.externals.joblib import Parallel, delayed

from mgcpy.independence_tests.mgc import MGC
from mgcpy.independence_tests.dcorr import DCorr
from mgcpy.benchmarks import simulations as sims

In [2]:
def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

In [3]:
def _dcorr(test_stat, X, Y, is_fast=False):
    p_value, _ = test_stat.p_value(X, Y, is_fast=is_fast)  # p-value call has mgc.test_statistic(X, Y) call\
    return p_value

In [4]:
def _mgc(test_stat, X, Y, is_fast=False):
    p_value, _ = test_stat.p_value(X, Y, is_fast=is_fast)  # p-value call has mgc.test_statistic(X, Y) call\
    return p_value

In [5]:
def find_performance(test_stat, is_fast=False):
    linear_data = []
    num_samples_range = [50, 100, 200, 500, 1000, 2000, 5000, 10000]
    
    with Parallel(n_jobs=4) as parallel:
        for num_samples in num_samples_range:
            X, Y = sims.linear_sim(num_samp=num_samples, num_dim=1, noise=0.1)

            #print(test_stat)
            #linear_data.append(num_samples)
            #for i in np.arange(1, 5):
                #start = time.time()
                #p_value, _ = test_stat.p_value(X, Y, is_fast=is_fast)
                #end = time.time()
                #time_taken = end - start
                #print(num_samples, time_taken)
                #linear_data.append(time_taken)
            if (test_stat.get_name() == "mgc"):
                test_stat_w = wrapper(_mgc, test_stat, X, Y, is_fast=is_fast)
            else:
                test_stat_w = wrapper(_dcorr, test_stat, X, Y, is_fast=is_fast)
            time_taken = timeit.repeat(test_stat_w, repeat=3, number=1) # 5 executions

            print(num_samples, time_taken)
            linear_data.append((num_samples, time_taken))
    print()
    print(linear_data)

In [6]:
mgc = MGC()
dcorr = DCorr(which_test='unbiased')

In [7]:
find_performance(dcorr, is_fast=True)

50 [0.0051514219999972966, 0.0022076080000026366, 0.0018121000000022036]
100 [0.0034445040000008476, 0.0030993330000015362, 0.0030953680000003203]
200 [0.007373816999997729, 0.00601642899999888, 0.0060290710000003855]
500 [0.01354306700000052, 0.013218483999999364, 0.013298075999998105]
1000 [0.02722889400000028, 0.02713840299999859, 0.02569159599999793]
2000 [0.05760880199999718, 0.05953626699999859, 0.05291860200000187]
5000 [0.13682578100000242, 0.14193364200000147, 0.13060394499999717]
10000 [0.28220065100000014, 0.2610882979999971, 0.257064794999998]

[(50, [0.0051514219999972966, 0.0022076080000026366, 0.0018121000000022036]), (100, [0.0034445040000008476, 0.0030993330000015362, 0.0030953680000003203]), (200, [0.007373816999997729, 0.00601642899999888, 0.0060290710000003855]), (500, [0.01354306700000052, 0.013218483999999364, 0.013298075999998105]), (1000, [0.02722889400000028, 0.02713840299999859, 0.02569159599999793]), (2000, [0.05760880199999718, 0.05953626699999859, 0.0529186

In [8]:
find_performance(dcorr, is_fast=False)

50 [0.002845835000002239, 0.0011753540000007945, 0.0010945900000010056]
100 [0.002760518999998851, 0.002069775999999024, 0.0023434770000001492]
200 [0.008292690000001102, 0.006414610999996739, 0.005347397000001308]
500 [0.048456222000002214, 0.03643240600000297, 0.032817249000000714]
1000 [0.20957781599999947, 0.16526774299999758, 0.15862956999999867]
2000 [1.226450045, 1.0538206680000002, 1.0607039649999983]
5000 [10.685185272999998, 10.098354584000006, 9.850486527999998]
10000 [47.362619448000004, 42.27663848899999, 41.56813245399999]

[(50, [0.002845835000002239, 0.0011753540000007945, 0.0010945900000010056]), (100, [0.002760518999998851, 0.002069775999999024, 0.0023434770000001492]), (200, [0.008292690000001102, 0.006414610999996739, 0.005347397000001308]), (500, [0.048456222000002214, 0.03643240600000297, 0.032817249000000714]), (1000, [0.20957781599999947, 0.16526774299999758, 0.15862956999999867]), (2000, [1.226450045, 1.0538206680000002, 1.0607039649999983]), (5000, [10.6851852

In [9]:
find_performance(mgc, is_fast=True)

50 [0.03530227699999955, 0.03007528400002002, 0.02789397200001531]
100 [0.08847141200001829, 0.1006327779999765, 0.10103151199999161]
200 [0.3445014119999996, 0.3373190419999901, 0.48196147500001985]
500 [1.9459453159999782, 1.94906924, 1.876369025999992]
1000 [7.270794276999993, 7.344032519999985, 7.5510465280000005]
2000 [29.949594722, 29.144898159000007, 28.045119925999984]
5000 [190.83023113499996, 192.24423500299997, 190.66331257899992]
10000 [820.110880737, 808.4045245320001, 826.0645153840001]

[(50, [0.03530227699999955, 0.03007528400002002, 0.02789397200001531]), (100, [0.08847141200001829, 0.1006327779999765, 0.10103151199999161]), (200, [0.3445014119999996, 0.3373190419999901, 0.48196147500001985]), (500, [1.9459453159999782, 1.94906924, 1.876369025999992]), (1000, [7.270794276999993, 7.344032519999985, 7.5510465280000005]), (2000, [29.949594722, 29.144898159000007, 28.045119925999984]), (5000, [190.83023113499996, 192.24423500299997, 190.66331257899992]), (10000, [820.11088

In [None]:
find_performance(mgc, is_fast=False)

50 [20.277572832999795, 19.514672866999717, 19.76604124200003]
100 [77.21430823599985, 75.460420247, 76.92919380700005]
200 [360.83216021599947, 373.1858237340002, 399.7870919220004]


In [None]:
print("Linear data (varying num_samples)\n")
print("num_samples", "time_taken(in secs)")
num_samples_range = range(10, 151, 10)
linear_data = list()
for num_samples in num_samples_range:
    X, Y = sims.linear_sim(num_samp=num_samples, num_dim=1, noise=0.1)

#     start = time.time()
#     mgc(X, Y)
#     end = time.time()
#     time_taken = end - start
    mgc_w = wrapper(mgc, X, Y)
    time_taken = timeit.repeat(mgc_w, repeat=5, number=1) # 5 executions

    print(num_samples, time_taken)
    linear_data.append((num_samples, time_taken))
print()
print(linear_data)

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
# from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

fig, ax = plt.subplots() # create a new figure with a default 111 subplot
num_samples = range(10, 151, 10)
linear_data=[(10, [1.338857016991824, 1.2757414609659463, 1.2779263269621879, 1.4971145219751634, 1.3661146109807305]), (20, [2.8680544089875184, 2.87015632604016, 2.728754689975176, 2.8876148290000856, 2.890731868974399]), (30, [5.032408714003395, 4.969336921989452, 4.934050586016383, 4.95871455501765, 4.946530134009663]), (40, [7.999172992014792, 7.848493886995129, 7.731969874002971, 7.740541153005324, 7.625895873003174]), (50, [10.81704160303343, 11.116306754993275, 11.214253887010273, 11.362716250005178, 11.366938438965008]), (60, [18.07483608595794, 15.904782090976369, 17.116227485996205, 17.64618211099878, 15.549220753018744]), (70, [22.56243832403561, 20.79431069199927, 21.139240577991586, 21.381752497982234, 20.882448325050063]), (80, [26.60254893900128, 26.625501686998177, 27.297777889005374, 26.655231295968406, 25.129886200011242]), (90, [32.88909237500047, 33.1772776839789, 32.75596649199724, 33.18418929097243, 31.98979727498954]), (100, [40.717417851963546, 40.31093926599715, 39.18663591099903, 39.284123717981856, 39.613843587983865]), (110, [47.1687217259896, 47.23789670702536, 47.77898542000912, 50.29210359300487, 47.594653180975]), (120, [55.56740204896778, 55.87813897395972, 54.004372351046186, 54.95389352401253, 56.913238506007474]), (130, [66.20663257600972, 64.53097786597209, 66.60136964300182, 66.44064873602474, 65.23120342200855]), (140, [75.94843081396539, 77.61276641098084, 75.93916373699903, 76.49872068100376, 79.62616679898929]), (150, [85.9680890890304, 86.16587820299901, 83.19360252196202, 84.42991423199419, 86.2413115259842])]
python_perf_times = [j for i, j in linear_data]
ax.plot(num_samples, np.array(python_perf_times)[:, 0], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=4, label="All 5 Trials")
ax.plot(num_samples, np.array(python_perf_times)[:, 1:], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=4)
ax.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='o', markerfacecolor='red', markersize=6, color='blue', linewidth=2, label="Avg. of 5 Trials")

# axins = zoomed_inset_axes(ax, 1.5, loc=2) # zoom-factor: 2.5, location: upper-left
# axins.plot(num_samples, python_perf_times, marker='X', markerfacecolor='olive', markersize=4, color='lightblue', linewidth=5)
# axins.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='X', markerfacecolor='red', markersize=6, color='blue', linewidth=5)
# axins.set_xlim(100, 150) # apply the x-limits
# axins.set_ylim(40, 120) # apply the y-limits

ax.set_xlabel('# of Samples (Data Points)', fontsize=18)
ax.set_xlim(9, 151)
ax.set_ylabel('Execution Time (Seconds)', fontsize=18)
ax.set_ylim(0, 90)
plt.legend()
plt.show()

In [None]:
print("Linear data (varying num_samples)\n")
print("num_samples", "time_taken(in secs)")
num_samples_range = range(10, 151, 10)
linear_data_after = list()
for num_samples in num_samples_range:
    X, Y = sims.linear_sim(num_samp=num_samples, num_dim=1, noise=0.1)

#     start = time.time()
#     mgc(X, Y)
#     end = time.time()
#     time_taken = end - start
    mgc_w = wrapper(mgc, X, Y)
    time_taken = timeit.repeat(mgc_w, repeat=5, number=1) # 5 executions

    print(num_samples, time_taken)
    linear_data_after.append((num_samples, time_taken))
print()
print(linear_data_after)

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
# from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

fig, ax = plt.subplots() # create a new figure with a default 111 subplot
num_samples = range(10, 151, 10)
python_perf_times = [j for i, j in linear_data_after]
ax.plot(num_samples, np.array(python_perf_times)[:, 0], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=4, label="All 5 Trials")
ax.plot(num_samples, np.array(python_perf_times)[:, 1:], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=4)
ax.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='o', markerfacecolor='red', markersize=6, color='blue', linewidth=2, label="Avg. of 5 Trials")

# axins = zoomed_inset_axes(ax, 1.5, loc=2) # zoom-factor: 2.5, location: upper-left
# axins.plot(num_samples, python_perf_times, marker='X', markerfacecolor='olive', markersize=4, color='lightblue', linewidth=5)
# axins.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='X', markerfacecolor='red', markersize=6, color='blue', linewidth=5)
# axins.set_xlim(100, 150) # apply the x-limits
# axins.set_ylim(40, 120) # apply the y-limits

ax.set_xlabel('# of Samples (Data Points)', fontsize=18)
ax.set_xlim(9, 151)
ax.set_ylabel('Execution Time (Seconds)', fontsize=18)
ax.set_ylim(0, 90)
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
# from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

fig, ax = plt.subplots() # create a new figure with a default 111 subplot
num_samples = range(10, 151, 10)

python_perf_times = [j for i, j in linear_data]
ax.plot(num_samples, np.array(python_perf_times)[:, 0], marker='X', markerfacecolor='red', markersize=6, color='yellow', linewidth=2, label="Python - All (before)")
ax.plot(num_samples, np.array(python_perf_times)[:, 1:], marker='X', markerfacecolor='red', markersize=6, color='yellow', linewidth=2)
ax.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='o', markerfacecolor='red', markersize=6, color='orange', linewidth=1, label="Python Avg. (before)")

python_perf_times = [j for i, j in linear_data_after]
ax.plot(num_samples, np.array(python_perf_times)[:, 0], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=2, label="Python - All (after)")
ax.plot(num_samples, np.array(python_perf_times)[:, 1:], marker='X', markerfacecolor='blue', markersize=6, color='lightblue', linewidth=2)
ax.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='o', markerfacecolor='darkblue', markersize=6, color='darkblue', linewidth=1, label="Python - Avg. (after)")

r_perf_times = {10: [1.179895, 1.435764, 1.392574, 1.364731, 1.244356], 20: [3.626441, 3.38207, 3.435028, 3.312011, 3.472121], 30: [6.725924, 7.19072, 6.850648, 7.024207, 6.970873], 40: [11.60234, 11.26915, 11.36571, 12.27015, 12.33833], 50: [19.56418, 17.98312, 17.36909, 17.16499, 18.17077], 60: [25.21052, 24.80274, 24.37495, 24.47379, 25.59039], 70: [33.09666, 33.38766, 32.52008, 32.90658, 33.6318], 80: [41.91707, 42.18968, 42.25746, 43.52817, 42.34682], 90: [54.26672, 60.08258, 52.39857, 51.12012, 52.91298], 100: [66.36207, 70.70109, 64.84415, 65.04437, 63.17586], 110: [76.69378, 77.56344, 79.68007, 79.66596, 82.81884], 120: [97.89148, 96.41177, 99.12007, 100.0838, 100.4973], 130: [119.39, 117.1136, 118.6135, 117.5654, 116.0784], 140: [139.2461, 136.9656, 137.8895, 136.6488, 139.2114], 150: [158.5903, 156.5999, 161.6194, 160.23, 161.1935]}
plt.plot(num_samples, np.array([v for k, v in r_perf_times.items()])[:, 0], marker='X', markerfacecolor='darkgreen', markersize=6, color='lightgreen', linewidth=2, label="R - All")
plt.plot(num_samples, np.array([v for k, v in r_perf_times.items()])[:, 1:], marker='X', markerfacecolor='darkgreen', markersize=6, color='lightgreen', linewidth=2)
plt.plot(num_samples, [np.mean(v) for k, v in r_perf_times.items()], marker='o', markerfacecolor='darkgreen', markersize=6, color='darkgreen', linewidth=1, label="R - Avg.")

ax.set_xlabel('# of Samples (Data Points)', fontsize=18)
ax.set_xlim(9, 155)
ax.set_ylabel('Execution Time (Seconds)', fontsize=18)
ax.set_ylim(0, 165)
plt.title("Cythonize 'distance transform' module (5 trials)")
plt.legend(prop={'size': 13})
plt.show()

In [None]:
# R code for benchmarking R MGC (r_performance.R)
# require("mgc")
# require("microbenchmark")

# print("Linear data (varying num_samples)")
# print("num_samples time_taken(in secs)")

# num_samples_range = seq(10, 150, by=10)
# linear_data <- list()
# i <- 1
# for (num_samples in num_samples_range){
#   data <- mgc.sims.linear(num_samples, 1, eps=0.1)

#   #start_time <- Sys.time()
#   #mgc.test(data$X, data$Y)
#   #end_time <- Sys.time()

#   #time_taken <- end_time - start_time
#   #time_taken <- as.numeric(time_taken, units = "secs")
#   times = seq(1, 5, by=1)
#   for (t in times){
#   time_taken <- microbenchmark(mgc.test(data$X, data$Y), times=1, unit="secs") # best of 5 executions
#   print(num_samples)
#   print(time_taken[1, 2]/(10^9))
#   linear_data <- c(linear_data, list("num_samples"=num_samples, "time_taken"=time_taken[1, 2]/(10^9)))
#   }
  
#   i <- i + 1
# }


# Performace Data (formatted)
#[(10, 1.179895), (10, 1.435764), (10, 1.392574), (10, 1.364731), (10, 1.244356), 
# (20, 3.626441), (20, 3.38207), (20, 3.435028), (20, 3.312011), (20, 3.472121), 
# (30, 6.725924), (30, 7.19072), (30, 6.850648), (30, 7.024207), (30, 6.970873), 
# (40, 11.60234), (40, 11.26915), (40, 11.36571), (40, 12.27015), (40, 12.33833), 
# (50, 19.56418), (50, 17.98312), (50, 17.36909), (50, 17.16499), (50, 18.17077), 
# (60, 25.21052), (60, 24.80274), (60, 24.37495), (60, 24.47379), (60, 25.59039), 
# (70, 33.09666), (70, 33.38766), (70, 32.52008), (70, 32.90658), (70, 33.6318), 
# (80, 41.91707), (80, 42.18968), (80, 42.25746), (80, 43.52817), (80, 42.34682), 
# (90, 54.26672), (90, 60.08258), (90, 52.39857), (90, 51.12012), (90, 52.91298), 
# (100, 66.36207), (100, 70.70109), (100, 64.84415), (100, 65.04437), (100, 63.17586), 
# (110, 76.69378), (110, 77.56344), (110, 79.68007), (110, 79.66596), (110, 82.81884), 
# (120, 97.89148), (120, 96.41177), (120, 99.12007), (120, 100.0838), (120, 100.4973), 
# (130, 119.39), (130, 117.1136), (130, 118.6135), (130, 117.5654), (130, 116.0784), 
# (140, 139.2461), (140, 136.9656), (140, 137.8895), (140, 136.6488), (140, 139.2114), 
# (150, 158.5903), (150, 156.5999), (150, 161.6194), (150, 160.23), (150, 161.1935)]

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

num_samples = range(10, 151, 10)
r_perf_times = {10: [1.179895, 1.435764, 1.392574, 1.364731, 1.244356], 20: [3.626441, 3.38207, 3.435028, 3.312011, 3.472121], 30: [6.725924, 7.19072, 6.850648, 7.024207, 6.970873], 40: [11.60234, 11.26915, 11.36571, 12.27015, 12.33833], 50: [19.56418, 17.98312, 17.36909, 17.16499, 18.17077], 60: [25.21052, 24.80274, 24.37495, 24.47379, 25.59039], 70: [33.09666, 33.38766, 32.52008, 32.90658, 33.6318], 80: [41.91707, 42.18968, 42.25746, 43.52817, 42.34682], 90: [54.26672, 60.08258, 52.39857, 51.12012, 52.91298], 100: [66.36207, 70.70109, 64.84415, 65.04437, 63.17586], 110: [76.69378, 77.56344, 79.68007, 79.66596, 82.81884], 120: [97.89148, 96.41177, 99.12007, 100.0838, 100.4973], 130: [119.39, 117.1136, 118.6135, 117.5654, 116.0784], 140: [139.2461, 136.9656, 137.8895, 136.6488, 139.2114], 150: [158.5903, 156.5999, 161.6194, 160.23, 161.1935]}
plt.plot(num_samples, np.array([v for k, v in r_perf_times.items()])[:, 0], marker='X', markerfacecolor='darkgreen', markersize=6, color='lightgreen', linewidth=4, label="All 5 Trials")
plt.plot(num_samples, np.array([v for k, v in r_perf_times.items()])[:, 1:], marker='X', markerfacecolor='darkgreen', markersize=6, color='lightgreen', linewidth=4)
plt.plot(num_samples, [np.mean(v) for k, v in r_perf_times.items()], marker='o', markerfacecolor='red', markersize=6, color='blue', linewidth=2, label="Avg. of 5 Trials")
plt.xlabel('# of Samples (Data Points)', fontsize=18)
plt.xlim(9, 151)
plt.ylabel('Execution Time (Seconds)', fontsize=18)
plt.ylim(0, 165)
plt.legend()

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

num_samples = range(10, 151, 10)
r_perf_times = {10: [1.179895, 1.435764, 1.392574, 1.364731, 1.244356], 20: [3.626441, 3.38207, 3.435028, 3.312011, 3.472121], 30: [6.725924, 7.19072, 6.850648, 7.024207, 6.970873], 40: [11.60234, 11.26915, 11.36571, 12.27015, 12.33833], 50: [19.56418, 17.98312, 17.36909, 17.16499, 18.17077], 60: [25.21052, 24.80274, 24.37495, 24.47379, 25.59039], 70: [33.09666, 33.38766, 32.52008, 32.90658, 33.6318], 80: [41.91707, 42.18968, 42.25746, 43.52817, 42.34682], 90: [54.26672, 60.08258, 52.39857, 51.12012, 52.91298], 100: [66.36207, 70.70109, 64.84415, 65.04437, 63.17586], 110: [76.69378, 77.56344, 79.68007, 79.66596, 82.81884], 120: [97.89148, 96.41177, 99.12007, 100.0838, 100.4973], 130: [119.39, 117.1136, 118.6135, 117.5654, 116.0784], 140: [139.2461, 136.9656, 137.8895, 136.6488, 139.2114], 150: [158.5903, 156.5999, 161.6194, 160.23, 161.1935]}
# python_perf_times = [(10, [1.49958706900361, 1.4638143540068995, 1.4030425519740675, 1.3368593089981005, 1.3159789350174833]), (20, [3.2115682750009, 3.1250685720006004, 2.985335567005677, 2.9859190119896084, 2.9734666740114335]), (30, [5.610436049988493, 5.6114496150112245, 5.622973475983599, 5.895857474999502, 5.7409448719990905]), (40, [9.215580917021725, 9.190541190007934, 9.390753469022457, 9.245557349990122, 10.03561893699225]), (50, [15.49718086401117, 14.153537111997139, 13.771135971997865, 13.558778512990102, 13.596087051002542]), (60, [19.011020814010408, 19.007475175021682, 20.17377686299733, 20.04925867501879, 19.011196178005775]), (70, [26.569766158994753, 25.17168415800552, 25.29060220599058, 25.5092632509768, 26.11904137901729]), (80, [34.66687790400465, 32.48355589900166, 33.50255332799861, 33.05492725997465, 32.60379292801372]), (90, [40.688592491991585, 40.77937115501845, 40.58879354299279, 40.54697668799781, 41.878742447996046]), (100, [49.89725384998019, 49.83314521599095, 49.63331973500317, 52.141055349988164, 53.55299054601346]), (110, [61.061585283023305, 59.94531189699774, 59.702204303001054, 59.59889721899526, 59.678368475986645]), (120, [73.59825191399432, 72.8198820920079, 75.33874897297937, 70.41872229299042, 70.9401475340128]), (130, [82.78681519901147, 83.08883842398063, 88.68507358198985, 87.82382465302362, 89.34507059599855]), (140, [98.98420377401635, 101.70177302500815, 95.44383225901402, 95.491604343988, 95.61231937698903]), (150, [115.22319458299899, 111.85267155797919, 115.49483873700956, 118.16604766299133, 121.73665377899306])]

plt.plot(num_samples, [np.mean(v) for k, v in r_perf_times.items()], marker='o', markerfacecolor='darkgreen', markersize=6, color='green', linewidth=2, label="R")
plt.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='X', markerfacecolor='red', markersize=8, color='orange', linewidth=2, label="Python")
plt.xlabel('# of Samples (Data Points)', fontsize=18)
plt.ylabel('Execution Time (Seconds)', fontsize=18)
plt.legend()

In [None]:
print("Linear data (varying num_samples) - Fast MGC\n")
print("num_samples", "time_taken(in secs)")
num_samples_range = range(10, 151, 10)
linear_data_fast_mgc = list()
for num_samples in num_samples_range:
    X, Y = sims.linear_sim(num_samp=num_samples, num_dim=1, noise=0.1)

#     start = time.time()
#     mgc(X, Y)
#     end = time.time()
#     time_taken = end - start
    mgc_w = wrapper(mgc, X, Y, True)
    time_taken = timeit.repeat(mgc_w, repeat=5, number=1) # 5 executions

    print(num_samples, time_taken)
    linear_data_fast_mgc.append((num_samples, time_taken))
print()
print(linear_data_fast_mgc)

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

num_samples = range(10, 151, 10)
r_perf_times = {10: [1.179895, 1.435764, 1.392574, 1.364731, 1.244356], 20: [3.626441, 3.38207, 3.435028, 3.312011, 3.472121], 30: [6.725924, 7.19072, 6.850648, 7.024207, 6.970873], 40: [11.60234, 11.26915, 11.36571, 12.27015, 12.33833], 50: [19.56418, 17.98312, 17.36909, 17.16499, 18.17077], 60: [25.21052, 24.80274, 24.37495, 24.47379, 25.59039], 70: [33.09666, 33.38766, 32.52008, 32.90658, 33.6318], 80: [41.91707, 42.18968, 42.25746, 43.52817, 42.34682], 90: [54.26672, 60.08258, 52.39857, 51.12012, 52.91298], 100: [66.36207, 70.70109, 64.84415, 65.04437, 63.17586], 110: [76.69378, 77.56344, 79.68007, 79.66596, 82.81884], 120: [97.89148, 96.41177, 99.12007, 100.0838, 100.4973], 130: [119.39, 117.1136, 118.6135, 117.5654, 116.0784], 140: [139.2461, 136.9656, 137.8895, 136.6488, 139.2114], 150: [158.5903, 156.5999, 161.6194, 160.23, 161.1935]}
linear_data_copy = [(10, [1.3570548910065554, 1.317704908986343, 1.250599796010647, 1.2129867470066529, 1.2188538330083247]), (20, [2.809477289003553, 2.662971756013576, 2.668166168994503, 2.810354543995345, 2.8085849939961918]), (30, [5.089567081973655, 4.908904140000232, 4.963905091979541, 4.862470469990512, 4.872956630017143]), (40, [7.7755367509962525, 7.639183080988005, 7.636393271997804, 7.643885943980422, 7.673354588012444]), (50, [11.33807383000385, 11.284572928998386, 11.579023433005204, 11.935501523985295, 11.589415601018118]), (60, [15.944066369003849, 15.691345383005682, 15.252294573001564, 15.21922270700452, 15.21380895600305]), (70, [20.097495351015823, 20.11479070200585, 20.14134455099702, 20.623360280005727, 20.394629952003015]), (80, [25.643525285995565, 25.59139153698925, 25.659372112015262, 25.80397002500831, 25.668325702979928]), (90, [32.741740912984824, 31.854122709017247, 31.89940690298681, 31.885286441014614, 32.73692899401067]), (100, [41.03519103198778, 40.1257850920083, 40.30881031299941, 39.976445167005295, 39.989109216985526]), (110, [46.49892311100848, 48.002260846988065, 48.743909012991935, 47.894316210004035, 46.35784816299565]), (120, [57.73439836999751, 59.01353847500286, 56.690341667999746, 56.15975032598362, 57.02876815799391]), (130, [67.02439867099747, 68.27157784899464, 66.59815313798026, 65.15316394198453, 66.08920010898146]), (140, [79.0230416849954, 77.24781862000236, 79.41891040399787, 77.30308651400264, 79.06514339800924]), (150, [90.88946284601116, 86.56499147901195, 86.16265920398291, 86.45272049200139, 87.83721533801872])]
python_perf_times = [j for i, j in linear_data_copy]
python_perf_times_fast_mgc = [j for i, j in linear_data_fast_mgc]

plt.plot(num_samples, [np.mean(v) for k, v in r_perf_times.items()], marker='o', markerfacecolor='darkgreen', markersize=6, color='green', linewidth=2, label="R")
plt.plot(num_samples, [np.mean(i) for i in python_perf_times], marker='X', markerfacecolor='red', markersize=8, color='orange', linewidth=2, label="Python")
plt.plot(num_samples, [np.mean(i) for i in python_perf_times_fast_mgc], marker='X', markerfacecolor='darkblue', markersize=8, color='blue', linewidth=2, label="Python (FastMGC)")

plt.ylim(-10, 160)
plt.xlabel('# of Samples (Data Points)', fontsize=18)
plt.ylabel('Execution Time (Seconds)', fontsize=18)
plt.legend()

In [None]:
import matplotlib.pyplot as plt; plt.style.use('classic')
plt.rcParams["legend.loc"] = "best"
plt.rcParams['figure.facecolor'] = 'white'

num_samples = range(10, 151, 10)
num_samples_syn = range(160, 1001, 10)
r_perf_times = {10: [1.179895, 1.435764, 1.392574, 1.364731, 1.244356], 20: [3.626441, 3.38207, 3.435028, 3.312011, 3.472121], 30: [6.725924, 7.19072, 6.850648, 7.024207, 6.970873], 40: [11.60234, 11.26915, 11.36571, 12.27015, 12.33833], 50: [19.56418, 17.98312, 17.36909, 17.16499, 18.17077], 60: [25.21052, 24.80274, 24.37495, 24.47379, 25.59039], 70: [33.09666, 33.38766, 32.52008, 32.90658, 33.6318], 80: [41.91707, 42.18968, 42.25746, 43.52817, 42.34682], 90: [54.26672, 60.08258, 52.39857, 51.12012, 52.91298], 100: [66.36207, 70.70109, 64.84415, 65.04437, 63.17586], 110: [76.69378, 77.56344, 79.68007, 79.66596, 82.81884], 120: [97.89148, 96.41177, 99.12007, 100.0838, 100.4973], 130: [119.39, 117.1136, 118.6135, 117.5654, 116.0784], 140: [139.2461, 136.9656, 137.8895, 136.6488, 139.2114], 150: [158.5903, 156.5999, 161.6194, 160.23, 161.1935]}
# python_perf_times = [(10, [1.49958706900361, 1.4638143540068995, 1.4030425519740675, 1.3368593089981005, 1.3159789350174833]), (20, [3.2115682750009, 3.1250685720006004, 2.985335567005677, 2.9859190119896084, 2.9734666740114335]), (30, [5.610436049988493, 5.6114496150112245, 5.622973475983599, 5.895857474999502, 5.7409448719990905]), (40, [9.215580917021725, 9.190541190007934, 9.390753469022457, 9.245557349990122, 10.03561893699225]), (50, [15.49718086401117, 14.153537111997139, 13.771135971997865, 13.558778512990102, 13.596087051002542]), (60, [19.011020814010408, 19.007475175021682, 20.17377686299733, 20.04925867501879, 19.011196178005775]), (70, [26.569766158994753, 25.17168415800552, 25.29060220599058, 25.5092632509768, 26.11904137901729]), (80, [34.66687790400465, 32.48355589900166, 33.50255332799861, 33.05492725997465, 32.60379292801372]), (90, [40.688592491991585, 40.77937115501845, 40.58879354299279, 40.54697668799781, 41.878742447996046]), (100, [49.89725384998019, 49.83314521599095, 49.63331973500317, 52.141055349988164, 53.55299054601346]), (110, [61.061585283023305, 59.94531189699774, 59.702204303001054, 59.59889721899526, 59.678368475986645]), (120, [73.59825191399432, 72.8198820920079, 75.33874897297937, 70.41872229299042, 70.9401475340128]), (130, [82.78681519901147, 83.08883842398063, 88.68507358198985, 87.82382465302362, 89.34507059599855]), (140, [98.98420377401635, 101.70177302500815, 95.44383225901402, 95.491604343988, 95.61231937698903]), (150, [115.22319458299899, 111.85267155797919, 115.49483873700956, 118.16604766299133, 121.73665377899306])]

plt.plot(num_samples, [np.mean(v) for k, v in r_perf_times.items()], color='green', linewidth=5, label="R")
model = np.polyfit(num_samples, [np.mean(v) for k, v in r_perf_times.items()], 2)
predicted_r = np.polyval(model, num_samples_syn)
plt.plot(num_samples_syn, predicted_r, color='green', linewidth=5, linestyle="-.", label="R (Predicted)")

plt.plot(num_samples, [np.mean(i) for i in python_perf_times], color='orange', linewidth=5, label="Python")
model = np.polyfit(num_samples, [np.mean(i) for i in python_perf_times], 2)
predicted_python = np.polyval(model, num_samples_syn)
plt.plot(num_samples_syn, predicted_python, color='orange', linewidth=5, linestyle="-.", label="Python (Predicted)")


plt.xlabel('# of Samples (Data Points)', fontsize=18)
plt.ylabel('Execution Time (Seconds)', fontsize=18)
plt.legend()



In [None]:
print("Linear data (varying num_dimensions)\n")
num_dims_range = range(100, 1001, 100)
linear_data_dims = list()
for num_dims in num_dims_range:
    X, Y = sims.linear_sim(num_samp=10, num_dim=num_dims, noise=0.1)

#     start = time.time()
#     mgc(X, Y)
#     end = time.time()
#     time_taken = end - start

    mgc_w = wrapper(mgc, X, Y)
    time_taken = min(timeit.repeat(mgc_w, repeat=5, number=1)) # best of 5 executions

    print("nums_dims:", num_dims, "time_taken (in secs):", time_taken)
    linear_data_dims.append((num_dims, time_taken))
print()
print(linear_data_dims)

In [None]:
print("Non-linear (spiral) data (varying num_samples)")
num_samples_range = range(10, 101, 10)
spiral_data = list()
for num_samples in num_samples_range:
    X, Y = sims.spiral_sim(num_samp=num_samples, num_dim=1, noise=0.1)

#     start = time.time()
#     mgc(X, Y)
#     end = time.time()
#     time_taken = end - start
    mgc_w = wrapper(mgc, X, Y)
    time_taken = min(timeit.repeat(mgc_w, repeat=5, number=1)) # best of 5 executions

    print("nums_dims:", num_samples, "time_taken (in secs):", time_taken)
    spiral_data.append((num_samples, time_taken))
print()
print(spiral_data)