In [None]:
%pip install -i https://test.pypi.org/simple/ phdu -U

In [5]:
pip uninstall phdu -y

Found existing installation: phdu 1.0.7
Uninstalling phdu-1.0.7:
  Successfully uninstalled phdu-1.0.7
Note: you may need to restart the kernel to use updated packages.


In [9]:
from setuptools import find_packages

In [11]:
find_packages("..")

['phdu', 'phdu.stats', 'phdu.plots', 'phdu.stats.test', 'phdu.stats.rtopy']

In [10]:
find_packages("../phdu")

['stats', 'plots', 'stats.test', 'stats.rtopy']

In [6]:
import sys
sys.path.append("..")

# Proyect structure

Check the tidypath examples:
- [Defining functions](https://github.com/medinajorge/tidypath/blob/master/tests/analysis/variable1/measurement1.py)
- [Calling functions & modifying args](https://github.com/medinajorge/tidypath/blob/master/tests/Example.ipynb)

In [7]:
from phdu import savedata, savefig, add_arg, modify_arg, delete_arg

# Confidence interval

In [8]:
import numpy as np
from phdu import resample
from phdu.stats import plots

In [None]:
np.random.seed(0)
X = np.random.exponential(scale=1, size=200)
plots.density_kernel(X, width=600, height=300, font_size=22, xaxis_title="X", ticksize=23, cov_factor=0.2) 

In [None]:
plots.qqplot(X) # skewed to the right

In [None]:
# The distribution of X is skewed. 
# We obtain the confidence interval for the mean using Bootstrap T
CI = resample.CI_bootstrap(X, alpha=0.05, alternative="two-sided", return_stats=False)
print("Sample mean: {:.3f} \nCI:{}".format(X.mean(), list(CI.round(3))))

In [None]:
CI_centered = CI - X.mean()
symmetric_tail_size = np.abs(CI_centered).sum() / 2
assymetry = (CI_centered[1] - symmetric_tail_size) / symmetric_tail_size
assymetry # the interval has a longer right tail to account for the skewness (aroud 10% in this case)

In [None]:
# Or a one-tailed CI
resample.CI_bootstrap(X, alpha=0.05, alternative="less", return_stats=False)

In [None]:
# We can compare all intervals. Let's do it for the median (bootstrap is statistic-agnostic)
import rpy2.robjects as ro
bs = resample.bootstrap(X, "bootstrapT", stat="median")
methods = ["t", "percentile", "bca", "bootstrapT"]
CI = {method: ro.r(f"CI.{method}(bs, probs=c(0.025, 0.975))")[0] for method in methods} # [0] because bootstrapT resamples also std
np.median(X), CI

In [None]:
# Notice bootstrapT correctly accounts for skewness.

# Test

In [None]:
from phdu import permutation
from phdu.stats import plots

In [None]:
# We want to see if the mean of the second is greater than of the first
X1 = np.random.chisquare(df=3, size=100) # mean = df
X2 = np.random.chisquare(df=3.5, size=100)
plots.density_kernel(X1, X2, width=600, height=300, font_size=22, xaxis_title="X", ticksize=23, cov_factor=0.3) 

In [None]:
permutation.permutation_test_2sample_mean(X1, X2, alternative="less")

In [None]:
# Now paired (the same attribute measured for the same experimental units twice)
X2_pair = X1 + np.random.normal(loc=0.2, scale=0.5, size=X1.size)
permutation.permutation_test_2sample_paired_diffmean(X1, X2_pair, alternative="less")

In [None]:
plots.density_kernel(X1 - X2_pair, width=600, height=300, font_size=22, xaxis_title="X", ticksize=23, cov_factor=0.3)