In [1]:
import hypothesis.strategies as st
import hypothesis.extra.numpy as npst
from hypothesis import given
from hypothesis import settings

In [24]:
import numpy as np

In [22]:
npst.array_shapes(min_dims=3, min_side=2).example()



(2, 2, 2)

In [20]:
help(npst.array_shapes)

Help on function array_shapes in module hypothesis.extra._array_helpers:

array_shapes(*, min_dims: int = 1, max_dims: Optional[int] = None, min_side: int = 1, max_side: Optional[int] = None) -> hypothesis.strategies.SearchStrategy[typing.Tuple[int, ...]]
    Return a strategy for array shapes (tuples of int >= 1).
    
    * ``min_dims`` is the smallest length that the generated shape can possess.
    * ``max_dims`` is the largest length that the generated shape can possess,
      defaulting to ``min_dims + 2``.
    * ``min_side`` is the smallest size that a dimension can possess.
    * ``max_side`` is the largest size that a dimension can possess,
      defaulting to ``min_side + 5``.



In [73]:
npst.arrays(
    np.float, npst.array_shapes(min_dims=2, min_side=2), elements=st.floats(1, 100)
).example()

array([[1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1.]])

In [75]:
x = np.array([[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]])

In [80]:
from scipy.linalg import svd

In [81]:
svd(x.transpose(), compute_uv=False)

array([6., 0., 0.])

In [82]:
type(x)

<class 'numpy.ndarray'>

In [88]:
import random
import numpy as np
import pandas as pd
from scipy.linalg import svd
from sklearn.utils.extmath import randomized_svd

from pysmooth import smooth

In [130]:
def num_pc(
    data: np.ndarray, method: str = None, B: int = 20, seed: int = None
) -> float:

    if method is None:
        method = "elbow"
    if method not in ("elbow", "permutation"):
        raise RuntimeError(
            f"method must be either 'elbow' or 'permutation', but \
                           {method} was passed"
        )

    if seed is not None:
        random.seed(seed)

    n = data.shape[1]  # nrows
    if n < 500:
        k = n
    else:
        k = int(max(200, n / 4))

    if isinstance(data, np.ndarray):
        print("Computing svd")
        data = normalize(data, axis=1)
        uu = compute_svd(data, k)
    elif isinstance(data, dict):
        if data["d"] is not None:
            if method == "permutation":
                console.print(
                    "Original data is needed for permutation method.\nSetting method to elbow"
                )
                method = "elbow"

        uu = data

    if (
        method == "permutation"
    ):  # not sure why this option is present in PLIER as it is not used
        print(
            "[red bold]WARNING!:[/red bold] using the 'permutation' method yields unreliable results.  This is only kept for compatibility with the R version of {PLIER}"
        )
        # nn = min(c(n, m))
        dstat = uu[0:k] ** 2 / sum(uu[0:k] ** 2)
        dstat0 = np.zeros(shape=(B, k))
        rng = np.random.default_rng()
        dat0 = np.copy(data)
        for i in range(B):
            dat0 = rng.permuted(dat0, axis=0).transpose()

            if k == n:
                uu0 = svd(dat0, compute_uv=False)
            else:
                _, uu0, _ = randomized_svd(M=dat0, n_components=k, n_iter=3)

            dstat0[i, :] = uu0[0:k] ** 2 / sum(uu0[0:k] ** 2)

        psv = np.ones(k)
        for i in range(k):
            psv[i] = np.count_nonzero(dstat0[:, i] >= dstat[i]) / dstat0.shape[0]

        for i in range(1, k):
            psv[i] = np.max([psv[(i - 1)], psv[i]])

        nsv = np.sum(psv[psv <= 0.1])
    elif method == "elbow":
        # xraw = abs(np.diff(np.diff(uu)))
        # console.print("Smoothing data")
        # x = smooth(xraw, twiceit = True)
        # # plot(x)

        # nsv = int((np.argwhere(x <= np.quantile(x, 0.05)))[2])+1

        nsv = elbow(uu)

    return nsv

In [132]:
def elbow(uu: np.ndarray) -> int:
    xraw = abs(np.diff(np.diff(uu)))
    print("Smoothing data")
    x = smooth(xraw, twiceit=True)
    # plot(x)

    return int((np.argwhere(x <= np.quantile(x, 0.5)))[1]) + 1

In [127]:
def compute_svd(data: np.ndarray, k: int) -> np.ndarray:
    n = data.shape[1]  # nrows
    if n < 500:
        uu = svd(data.transpose(), compute_uv=False)
        return uu
    else:
        _, uu, _ = randomized_svd(M=data, n_components=k, n_iter=3, random_state=803)
        return uu

In [133]:
num_pc(x)

Computing svd
Smoothing data


2

In [135]:
z = np.array(
    [
        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
    ]
)