TenSEALの使い方(暗号化)
===

In [196]:
%pip install tenseal
!rm -rf enc-data
%mkdir enc-data




UsageError: Line magic function `%rm` not found.

[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [198]:
import tenseal as ts
import pandas as pd
import joblib
from sklearn.preprocessing import OrdinalEncoder

In [199]:
# Create the TenSEAL security context
def create_ctx():
    """Helper for creating the CKKS context.
    CKKS params:
        - Polynomial degree: 8192.
        - Coefficient modulus size: [40, 21, 21, 21, 21, 21, 21, 40].
        - Scale: 2 ** 21.
        - The setup requires the Galois keys for evaluating the convolutions.
    """
    poly_mod_degree = 8192
    coeff_mod_bit_sizes = [40, 21, 21, 21, 21, 21, 21, 40]
    ctx = ts.context(ts.SCHEME_TYPE.CKKS, poly_mod_degree, -1, coeff_mod_bit_sizes)
    ctx.global_scale = 2 ** 21
    ctx.generate_galois_keys()
    ctx.generate_relin_keys()
    return ctx

In [200]:
document_columns = ["日常生活で心がけている健康習慣はどんなものですか？"]
string_columns = ["最も好きな運動は何ですか？"]

In [201]:
df = pd.read_csv('raw-data/健康習慣.csv').drop(document_columns, axis=1)
df

Unnamed: 0,1週間に何回、運動をしていますか？,1日に摂取する水の量はどのくらいですか？,最も好きな運動は何ですか？,睡眠時間は平均して何時間ですか？
0,4,2.628471,バスケ,13.302334
1,3,1.690676,テニス,21.558928
2,4,2.551806,テニス,9.333607
3,4,1.632503,サッカー,7.529754
4,3,2.208212,テニス,4.869905
...,...,...,...,...
95,3,2.850985,テニス,4.059686
96,4,1.741371,バスケ,12.457003
97,0,2.873948,テニス,7.950638
98,5,1.487113,テニス,19.900786


In [202]:
oe = OrdinalEncoder()
df[string_columns] = oe.fit_transform(df[string_columns]).astype(int)
df

Unnamed: 0,1週間に何回、運動をしていますか？,1日に摂取する水の量はどのくらいですか？,最も好きな運動は何ですか？,睡眠時間は平均して何時間ですか？
0,4,2.628471,2,13.302334
1,3,1.690676,1,21.558928
2,4,2.551806,1,9.333607
3,4,1.632503,0,7.529754
4,3,2.208212,1,4.869905
...,...,...,...,...
95,3,2.850985,1,4.059686
96,4,1.741371,2,12.457003
97,0,2.873948,1,7.950638
98,5,1.487113,1,19.900786


In [203]:
context = create_ctx()
with open("enc-data/context.joblib", "wb") as f:
    f.write(context.serialize(save_secret_key=True))
context.is_private()

True

In [204]:
pubctx = context.copy()
pubctx.make_context_public()
pubctx.is_private()

False

In [205]:
enclist = list()
for i, row in df.iterrows():
    rawdata = row.values
    encdata = ts.ckks_vector(context, rawdata)
    enclist.append(encdata)

In [206]:
def tenseal_save(enclist, filename="enc-data/main"):
    metaenclist = list()
    with open(f"{filename}.tso", "wb") as f:
        for data in enclist:
            data = data.serialize()
            f.write(data)
            metaenclist.append(len(data))
    joblib.dump(metaenclist, f"{filename}-meta.joblib")

In [207]:
joblib.dump(pubctx.serialize(), "enc-data/public_context.joblib")
joblib.dump(oe, "enc-data/ordinalencoder.joblib")
tenseal_save(enclist, filename="enc-data/main")