TenSEALの使い方(暗号化)
===

In [1]:
%pip install tenseal
!rm -rf enc-data
%mkdir enc-data

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
'rm' �́A�����R�}���h�܂��͊O���R�}���h�A
����\�ȃv���O�����܂��̓o�b�` �t�@�C���Ƃ��ĔF������Ă��܂���B
�T�u�f�B���N�g���܂��̓t�@�C�� enc-data �͊��ɑ��݂��܂��B


In [2]:
import tenseal as ts
import pandas as pd
import joblib
from sklearn.preprocessing import OrdinalEncoder

In [3]:
# Create the TenSEAL security context
def create_ctx():
    """Helper for creating the CKKS context.
    CKKS params:
        - Polynomial degree: 8192.
        - Coefficient modulus size: [40, 21, 21, 21, 21, 21, 21, 40].
        - Scale: 2 ** 21.
        - The setup requires the Galois keys for evaluating the convolutions.
    """
    poly_mod_degree = 8192
    coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 21, 40]
    ctx = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
    ctx.global_scale = 2 ** 21
    ctx.generate_galois_keys()
    ctx.generate_relin_keys()
    return ctx

def tenseal_save(enclist, filename="enc-data/main"):
    metaenclist = list()
    with open(f"{filename}.tso", "wb") as f:
        for data in enclist:
            data = data.serialize()
            f.write(data)
            metaenclist.append(len(data))
    joblib.dump(metaenclist, f"{filename}-meta.joblib")

In [4]:
def doc2vec(x, model, tokenizer):
    return model.infer_vector(tokenizer(x))

def df2num(rawdf, document_columns, string_columns):
    df = rawdf.copy()
    df[document_columns] = df[document_columns].map(doc2vec)
    oe = OrdinalEncoder()
    df[string_columns] = oe.fit_transform(df[string_columns]).astype(int)
    return df, oe

def df2enc(df, ctx,):
    enclist = list()
    for i, row in df.iterrows():
        rawdata = row.values
        encdata = ts.ckks_vector(ctx, rawdata)
        enclist.append(encdata)
    return enclist

In [5]:
document_columns = ["日常生活で心がけている健康習慣はどんなものですか？"]
string_columns = ["最も好きな運動は何ですか？"]

In [6]:
df, oe = df2num(pd.read_csv('raw-data/健康習慣.csv'), document_columns, string_columns)

In [7]:
context = create_ctx()
with open("enc-data/context.joblib", "wb") as f:
    f.write(context.serialize(save_secret_key=True))
context.is_private()

True

In [8]:
pubctx = context.copy()
pubctx.make_context_public()
del context
pubctx.is_private()

False

In [9]:
enclist = df2enc(df, pubctx,)

In [10]:
joblib.dump(pubctx.serialize(), "enc-data/public_context.joblib")
joblib.dump(oe, "enc-data/ordinalencoder.joblib")
tenseal_save(enclist, filename="enc-data/main")