In [1]:
import time

import pandas as pd
import numpy as np
from scipy.stats import norm

from qolmat.analysis.pklm_numpy import PKLMtest
from qolmat.benchmark.missing_patterns import UniformHoleGenerator

In [2]:
nb_rows = 500
nb_col = 2

# Création du DataFrame avec des données aléatoires
data = {f"Colonne_{i}": np.random.randint(0, 100, nb_rows).astype(float) for i in range(nb_col)}

df = pd.DataFrame(data)

# Introduction de valeurs manquantes
nb_valeurs_manquantes = int(0.1 * df.size)
indices_valeurs_manquantes = np.random.choice(df.size, nb_valeurs_manquantes, replace=False)
df.values.flat[indices_valeurs_manquantes] = np.nan

In [3]:
start_time = time.time()

p_val = PKLMtest(df.to_numpy())

print("--- %s seconds ---" % (time.time() - start_time))
print(f"The p-value of the PKLM test is {p_val}")

--- 3.6945717334747314 seconds ---
The p-value of the PKLM test is 0.7741935483870968


In [5]:
# Case 2

In [4]:
rng = np.random.RandomState(42)
data = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
df = pd.DataFrame(data=data, columns=["Column 1", "Column 2"])

q975 = norm.ppf(0.975)
df_mask = pd.DataFrame({"Column 1": False, "Column 2": df["Column 1"] > q975}, index=df.index)

df_nan = df.where(~df_mask, np.nan)

In [8]:
start_time = time.time()
p_val = PKLMtest(df_nan.to_numpy())
print("--- %s seconds ---" % (time.time() - start_time))
print(f"The p-value of the PKLM test is {p_val}")

--- 1.9384348392486572 seconds ---
The p-value of the PKLM test is 0.03225806451612903


In [6]:
# Case 3

In [7]:
df_mask = pd.DataFrame(
    {"Column 1": False, "Column 2": df["Column 1"].abs() > q975}, index=df.index
)

df_nan = df.where(~df_mask, np.nan)
start_time = time.time()
p_val = PKLMtest(df_nan.to_numpy())
print("--- %s seconds ---" % (time.time() - start_time))
print(f"The p-value of the PKLM test is {p_val}")

--- 1.947416067123413 seconds ---
The p-value of the PKLM test is 0.03225806451612903
