# QuadratiK Usage Examples

## Normality Test

This section contains example for the Parametric and Non-parametric Normality Test based on kernel-based quadratic distances

### Parametric

In [None]:
import numpy as np
from QuadratiK.kernel_test import KernelTest
np.random.seed(42)
data = np.random.randn(100,2)

normality_test = KernelTest(h=0.4, centering_type="param",random_state=42).test(data)
print("Test : {}".format(normality_test.test_type_))
print("Execution time: {:.3f}".format(normality_test.execution_time))
print("H0 is Rejected : {}".format(normality_test.h0_rejected_))
print("Test Statistic : {}".format(normality_test.test_statistic_))
print("Critical Value (CV) : {}".format(normality_test.cv_))
print("CV Method : {}".format(normality_test.cv_method_))
print("Selected tuning parameter : {}".format(normality_test.h))

In [None]:
print(normality_test.summary())

### Non-parametric

In [None]:
normality_test = KernelTest(h=0.4, centering_type="nonparam").test(data)
print("Test : {}".format(normality_test.test_type_))
print("Execution time: {:.3f}".format(normality_test.execution_time))
print("H0 is Rejected : {}".format(normality_test.h0_rejected_))
print("Test Statistic : {}".format(normality_test.test_statistic_))
print("Critical Value (CV) : {}".format(normality_test.cv_))
print("CV Method : {}".format(normality_test.cv_method_))
print("Selected tuning parameter : {}".format(normality_test.h))

In [None]:
print(normality_test.summary())

### QQ Plot

In [None]:
from QuadratiK.tools import qq_plot
qq_plot(data)

## Two Sample Test

This sections shows example for the two-sample test using normal kernel-based quadratic distance 

In [None]:
import numpy as np
from QuadratiK.kernel_test import KernelTest
np.random.seed(42)
X = np.random.randn(100,2)
np.random.seed(42)
Y = np.random.randn(100,2)

two_sample_test = KernelTest(h=0.4, random_state=42).test(X,Y)
print("Test : {}".format(two_sample_test.test_type_))
print("Execution time: {:.3f}".format(two_sample_test.execution_time))
print("H0 is Rejected : {}".format(two_sample_test.h0_rejected_))
print("Test Statistic : {}".format(two_sample_test.test_statistic_))
print("Critical Value (CV) : {}".format(two_sample_test.cv_))
print("CV Method : {}".format(two_sample_test.cv_method_))
print("Selected tuning parameter : {}".format(two_sample_test.h))

In [None]:
print(two_sample_test.summary())

## K-Sample Test

Shows examples for the kernel-based quadratic distance k-sample tests with the Normal kernel and bandwidth
parameter h.

In [None]:
from QuadratiK.kernel_test import KernelTest
np.random.seed(42)
X = np.random.randn(500,2)
np.random.seed(42)
y = np.random.randint(0,5,500)
k_sample_test = KernelTest(h = 1.5, method = "permutation").test(X,y)

print("Test : {}".format(k_sample_test.test_type_))
print("Execution time: {:.3f} seconds".format(k_sample_test.execution_time))
print("H0 is Rejected : {}".format(k_sample_test.h0_rejected_))
print("Test Statistic : {}".format(k_sample_test.test_statistic_))
print("Critical Value (CV) : {}".format(k_sample_test.cv_))
print("CV Method : {}".format(k_sample_test.cv_method_))
print("Selected tuning parameter : {}".format(k_sample_test.h))

In [None]:
print(k_sample_test.summary())

## Poisson Kernel Test

Shows example for perforing the the kernel-based quadratic distance Goodness-of-fit tests for Uniformity for
spherical data using the Poisson kernel with concentration parameter rho.

In [None]:
from QuadratiK.tools import sample_hypersphere
from QuadratiK.poisson_kernel_test import PoissonKernelTest
np.random.seed(42)
X = sample_hypersphere(100,3, random_state=42)
unif_test = PoissonKernelTest(rho = 0.7, random_state=42).test(X)

print("Execution time: {:.3f} seconds".format(unif_test.execution_time))

print("U Statistic Results")
print("H0 is rejected : {}".format(unif_test.u_statistic_h0_))
print("Un Statistic : {}".format(unif_test.u_statistic_un_))
print("Critical Value : {}".format(unif_test.u_statistic_cv_))

print("V Statistic Results")
print("H0 is rejected : {}".format(unif_test.v_statistic_h0_))
print("Vn Statistic : {}".format(unif_test.v_statistic_vn_))
print("Critical Value : {}".format(unif_test.v_statistic_cv_))

In [None]:
print(unif_test.summary())

### QQ Plot

In [None]:
from QuadratiK.tools import qq_plot

qq_plot(X,dist = "uniform")

## Poisson Kernel based Clustering

Shows example for performing the Poisson kernel-based clustering algorithm on the Sphere based on
the Poisson kernel-based densities.

In [None]:
from QuadratiK.datasets import load_wireless_data
from QuadratiK.spherical_clustering import PKBC
from sklearn.preprocessing import LabelEncoder

X, y = load_wireless_data(return_X_y=True)

le = LabelEncoder()
le.fit(y)
y = le.transform(y)

cluster_fit = PKBC(num_clust=4, random_state=42).fit(X)
ari, macro_precision, macro_recall, avg_silhouette_Score = cluster_fit.validation(y)

print("Estimated mixing proportions :", cluster_fit.alpha_)
print("Estimated concentration parameters: ", cluster_fit.rho_)

print("Adjusted Rand Index:", ari)
print("Macro Precision:", macro_precision)
print("Macro Recall:", macro_recall)
print("Average Silhouette Score:", avg_silhouette_Score)

### Elbow Plot using Euclidean Distance and Cosine Similarity based WCSS

In [None]:
import matplotlib.pyplot as plt

wcss_euc = []
wcss_cos = []

for i in range(2, 10):
    clus_fit = PKBC(num_clust=i).fit(X)
    wcss_euc.append(clus_fit.euclidean_wcss_)
    wcss_cos.append(clus_fit.cosine_wcss_)

fig = plt.figure(figsize=(6, 4))
plt.plot(list(range(2, 10)), wcss_euc, "--o")
plt.xlabel("Number of Cluster")
plt.ylabel("Within Cluster Sum of Squares (WCSS)")
plt.title("Elbow Plot for Wireless Indoor Localization dataset")
plt.show()

fig = plt.figure(figsize=(6, 4))
plt.plot(list(range(2,10)),wcss_cos, "--o")
plt.xlabel("Number of Cluster")
plt.ylabel("Within Cluster Sum of Squares (WCSS)")
plt.title("Elbow Plot for Wireless Indoor Localization dataset")
plt.show()

## Density Estimation and Sample Generation from PKBD

In [None]:
from QuadratiK.spherical_clustering import PKBD
pkbd_data = PKBD().rpkb(10,[0.5,0],0.5, "rejvmf", random_state= 42)
dens_val  = PKBD().dpkb(pkbd_data, [0.5,0.5],0.5)
print(dens_val)

## Tuning Parameter $h$ selection

Computes the kernel bandwidth of the Gaussian kernel for the two-sample and ksample
kernel-based quadratic distance (KBQD) tests.

In [None]:
import numpy as np
from QuadratiK.kernel_test import select_h
np.random.seed(42)
X = np.random.randn(200, 2)
np.random.seed(42)
y = np.random.randint(0, 2, 200)
h_selected, all_values, power_plot = select_h(
    X, y, alternative='location', power_plot=True,  random_state=None)
print("Selected h is: ", h_selected)

In [None]:
#shows the detailed power vs h table
all_values

In [None]:
#shows the power plot
power_plot