# Cython vs Pure Python Time Comparison:
This notebook is designed to test the two versions of ThermoPyle as the currently stand (2017-05-12). We want to know if Cython makes a difference in computation time compared to pure Python. To do this, we will use statistical methods at the $N=35$ level.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt, mpld3
import ThermoPyleCYTHON as TPX
import ThermoPyleRAW as TP
import numpy as np
import itertools as it
%matplotlib notebook
mpld3.enable_notebook()
#print(plt.style.available)
plt.style.use(["seaborn-talk","seaborn-notebook","seaborn-paper"])
from timeit import default_timer as timer
doc = lambda obj: print(obj.__doc__)

In [2]:
def test_ThermoFluid(Water: TP.ThermoFluid) -> None:
    Water.add_column(["S", "D"])
    #Water.data["V"] = pd.Series(Water.M / Water.data["D"], index=Water.data.index)
    Water.refresh()
    d_vars = set(Water.vars)
    newCols = {f"d({z})/d({y})|{x}" for x,y,z in it.permutations(d_vars, 3)}.union(d_vars).difference(set(Water.vars))
    #print(f"Number of newCls: {len(newCols)}")
    for col in newCols:
        try:    
            Water.add_column(col)
        except:
            pass
    Water.refresh()

def TP_test(accum):
    start = timer()
    WaterTP = TP.ThermoFluid(fluid="Water", xvar="T", yvar="P", zvar="U", numPoints=[217,217])
    test_ThermoFluid(WaterTP)
    end = timer()
    accum.append(end - start)
    del(WaterTP)
    
def TPX_test(accum):
    start = timer()
    WaterTPX = TPX.ThermoFluid(fluid="Water", xvar="T", yvar="P", zvar="U", numPoints=[217,217])
    test_ThermoFluid(WaterTPX)
    end = timer()
    accum.append(end - start)
    del(WaterTPX)

In [3]:
TPX_times = []
TP_times = []
for i in range(35):
    TPX_test(TPX_times)
    TP_test(TP_times)

In [51]:
import numpy as np
import scipy.stats as stats
import statsmodels.stats.api as sms
import statsmodels.api as sm

#Test Setup
alternative="two-sided"

#Models:
tp = np.array(TP_times)
tpx = np.array(TPX_times)
w, equal_var_p = stats.levene(tp, tpx)
equal_var = bool(equal_var_p <= 0.05)
t, p_value = stats.ttest_ind(TP_times, TPX_times, equal_var=equal_var)

usevar = 'pooled' if equal_var else 'unequal'

tp_stats = sms.DescrStatsW(tp)
tpx_stats = sms.DescrStatsW(tpx)

cm = sms.CompareMeans(tp_stats, tpx_stats)
# alpha=0.05, use_t=True, value=0.0
l, u = cm.tconfint_diff(alpha=0.05, usevar=usevar, alternative=alternative)

U = max(map(abs, [l,u]))
L = min(map(abs, [l,u]))

#sms_summary = cm.summary(alpha=0.05, use_t=True, value=0.0, usevar=usevar)

summary = f"""{"="*(22+len("No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."))}
Analysis for ThermoPyle Speeds (Cython vs Pure Python)
{"="*(22+len("No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."))}    
    Summary Statistics:
    -------------------------------
    Pure Python MEAN:     {np.mean(tp):5f}
    Pure Python STD DEV:  {np.std(tp):5f}
    Cython MEAN:          {np.mean(tpx):5f}
    Cython STD DEV:       {np.std(tpx):5f}
    
    Tests:
    ------------------------------------
    Levene's Test:
        Test Statistic:   {w:5f}
        Equal Variances?: {equal_var}
        
    {"Welch's" if not(equal_var) else "Student's"} t-Test:
        Mode:             {alternative}
        Test Statistic:   {t:5f}
        P:                {p_value:5f}
        
    Confidence Interval for Difference:
        Lower Limit       {L:5f}
        Mean Difference   {((l+u)/2):5f}
        Upper Limit:      {U:5f}

{"-"*(22+len("No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."))}
Significant?:         {"No" if p_value>0.05 else "Yes"}
Meaning:              {"No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."}
{"-"*(22+len("No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."))}

End of Analysis.
{"="*(22+len("No difference in computational times." if p_value>0.05 else "Difference in Mean Computational times."))}
"""
print(summary)

Analysis for ThermoPyle Speeds (Cython vs Pure Python)
    Summary Statistics:
    -------------------------------
    Pure Python MEAN:     27.143570
    Pure Python STD DEV:  1.077716
    Cython MEAN:          27.193202
    Cython STD DEV:       1.079805
    
    Tests:
    ------------------------------------
    Levene's Test:
        Test Statistic:   0.097300
        Equal Variances?: False
        
    Welch's t-Test:
        Mode:             two-sided
        Test Statistic:   -0.189696
        P:                0.850112
        
    Confidence Interval for Difference:
        Lower Limit       0.472459
        Mean Difference   -0.049632
        Upper Limit:      0.571722

-----------------------------------------------------------
Significant?:         No
Meaning:              No difference in computational times.
-----------------------------------------------------------

End of Analysis.



# Conclusion:
I'm switching back to using raw python, the extra steps for Cython aren't worth it for us (apparently).