# Query Limiting

Aims: This notebook will demonstrate query limiting on the SCN side. This will include; 

- A new additon to the SCN side helper lib: *privacy_sentinel*
    - privacy_sentinel contains the logic to be used by safe objects in order to vet their execution
    - privacy_sentinel vets execution at the computation layer but sentinel logic could be used by layers below to vet incoming jobs.
- This demo contains one statistical function which has been modified to accomodate for query limiting logic

We neeed query limiting over entire federations

## 1. The Privacy Sentinel MVP

- Contains generic logic to stop queries on samples under N size. 
- Until an appropriate value for N is determined, N is a constant value for all query types.

### 1.1 Get Data

In [25]:
from sail_safe_functions_test.helper_sail_safe_functions.data_frame_federated_local import DataFrameFederatedLocal
from sail_safe_functions_test.helper_sail_safe_functions.series_federated_local import SeriesFederatedLocal

DATA_PATH = "../../sail-safe-functions-test/sail_safe_functions_test/data_sail_safe_functions"

list_name_file_csv = ["bmc1.csv", "bwh1.csv", "mgh1.csv"]
id_column_0 = "PD-L1 level before treatment"


dataframe = DataFrameFederatedLocal()
for name_file_csv in list_name_file_csv:
    path_file_csv = os.path.join(DATA_PATH, "data_csv_investor_demo", name_file_csv)
    dataframe.add_csv(path_file_csv)

one_sample_big = dataframe[id_column_0]

In [26]:
one_sample_big

<sail_safe_functions_test.helper_sail_safe_functions.series_federated_local.SeriesFederatedLocal at 0x7f2190b58b80>

In [27]:
from helper_libs.scn_side.machine_learning.

True


In [28]:
from typing import List
import math
from typing import List
from sail_safe_functions.statistics.kurtosis_precompute import KurtosisPrecompute

class KurtosisAggregate:
    """
    Aggregates data for Kurtosis
    """

    def run(list_list_precompute: List[List[float]]):

        """
        A Function to get the fedrated Kurtosis value.
        same as scipy.skewsnes ()

        :param list_list_precompute: compute from different DF
        :type list_list_precompute: List[List[float]]
        :return: Kurtosis Value
        :rtype: Float

        """
        PrivacySentinel.query_limit_local_n_precompute(list_list_precompute, n=1000)

        sum_x_0 = 0
        sum_xx_0 = 0
        sum_xxx_0 = 0
        sum_xxxx_0 = 0
        size_sample_0 = 0
        # Combining precompute
        for list_precompute in list_list_precompute:
            sum_x_0 += list_precompute[0]
            sum_xx_0 += list_precompute[1]
            sum_xxx_0 += list_precompute[2]
            sum_xxxx_0 += list_precompute[3]
            size_sample_0 += list_precompute[4]  # same as Count_0

        # Calculating sampel mean
        sample_mean_0 = sum_x_0 / size_sample_0
        # Calculating sample variance
        sample_variance_0 = (sum_xx_0 / size_sample_0) - (sample_mean_0 * sample_mean_0)
        # Calculating Sample
        sample_standard_deviation = math.sqrt(sample_variance_0)

        # mu Geometric
        mu2 = sum_xx_0 / size_sample_0
        mu3 = sum_xxx_0 / size_sample_0
        mu4 = sum_xxxx_0 / size_sample_0
        mean = sample_mean_0
        standard_deviation = sample_standard_deviation

        # Final Statistical formula for calculating Kurtosis
        # wiki link below for the formula
        # https://en.wikipedia.org/wiki/Kurtosis

        kurtosis_value = (
            (mu4) - 3 * (mean**4) - 4 * ((mu3) * (mean)) + 6 * ((mu2) * (mean**2))
        ) / (standard_deviation**4)

        return kurtosis_value - 3


In [29]:
list_list_precompute = []
# TODO deal with posibilty sample_0 and sample_1 do net share same child frames

# Calculating precompute
for series in one_sample_big.dict_series.values():
    list_list_precompute.append(KurtosisPrecompute.run(series))

# Final Kurtosis Value
kurtosis_value = KurtosisAggregate.run(list_list_precompute)
# kurtosis_value
kurtosis_value

NameError: Too few samples

In [None]:
one_sample_big

<sail_safe_functions_test.helper_sail_safe_functions.series_federated_local.SeriesFederatedLocal at 0x7f21d8136910>