In [58]:
import pandas as pd
import numpy as np
from functools import reduce
from operator import add
import binascii
import names

# Data to pseudonymize = original data

In [59]:
SAMPLE_SIZE = 1000
columns = ["id", "personal_identification_number", "first_name", "last_name", "school"]

In [60]:
import random
school_list = ["CVUT", "VSE", "UK", "AMU", "AVU", "JAMU", "JU", "MU", "MENDELU", "OU", "SU", "TUL", "UHK", "UJEP", "UK", "UP", "UPCE", "UTB", "VETUNI", "VSB", "VSCHT", "UMPRUM", "VUT", "ZCU"]
def get_random_school():
    return random.sample(school_list, 1)[0]

In [61]:
import uuid
samples = list()
for i in range(SAMPLE_SIZE):
    samples.append({columns[0]: uuid.uuid4().hex,columns[1]: f'{random.randint(100000,999999)}/{random.randint(1000,9999)}',columns[2]: names.get_first_name(), columns[3]: names.get_last_name(), columns[4]: get_random_school()})
df_original = pd.DataFrame(samples)
df_original.to_csv('original.csv', index=False)

In [62]:
df_original

Unnamed: 0,id,personal_identification_number,first_name,last_name,school
0,1d7f8fb0d5e345a8a5154272a5c0e896,707026/8655,Jose,Bjorseth,UP
1,b163eb7d2b564d3390b308f8c5cf394a,481357/1281,Joseph,Keyes,ZCU
2,a1736f17c4f74bd88a2440285144648c,521904/4434,David,Fletcher,UP
3,3b7c7730c35c4699850763168e55184b,587801/3674,Paul,Ries,AVU
4,161607f5e6e0416f8dc9b837c09f5804,959834/1423,Doyle,Albright,SU
...,...,...,...,...,...
995,f7517f58356641fe9cd5f50f56eda34a,390887/8258,Tony,Natividad,AMU
996,3848648d23904b4e8642314c030cf761,905403/2656,Frank,Rogoff,SU
997,3197e4494e6f419d82d716f33d3d9107,190003/5221,Aubrey,Paris,CVUT
998,835513679eaa42d79d89a6a461be5dbe,792786/3494,Theodore,Seville,ZCU


In [63]:
df_original.groupby(['school']).count()['id']

school
AMU        54
AVU        43
CVUT       37
JAMU       39
JU         43
MENDELU    51
MU         39
OU         32
SU         52
TUL        40
UHK        44
UJEP       44
UK         99
UMPRUM     37
UP         38
UPCE       39
UTB        36
VETUNI     40
VSB        40
VSCHT      27
VSE        40
VUT        43
ZCU        43
Name: id, dtype: int64

# PKCS11

In [64]:
import pkcs11
import os
from pkcs11 import KeyType, Attribute,ObjectClass
import time
lib = pkcs11.lib(os.environ['PKCS11_MODULE'])
token = lib.get_token(token_label='08')

In [65]:
required_key_label = "KBE_DEMO_09"

In [66]:
def openSession(rw=False):
    return token.open(rw, user_pin='1234')

In [67]:
def get_key_by_label(label, session):
    for obj in session.get_objects({
        Attribute.CLASS: ObjectClass.SECRET_KEY,
        Attribute.LABEL: label,
    }):
        return obj

In [68]:
#If the CKA_SENSITIVE attribute is CK_TRUE, or if the CKA_EXTRACTABLE attribute is CK_FALSE, 
#then certain attributes of the secret key cannot be revealed in plaintext outside the token.
def generate_AES_KEY(label):
    key = session.generate_key(pkcs11.KeyType.AES, 256, store=True, label=label, template={
    Attribute.SENSITIVE: True,
    Attribute.EXTRACTABLE: True,
    })
    return key

In [86]:
session = openSession(True)
key = get_key_by_label(required_key_label, session) 
if key is None:
    print("Key is missing, generating one")
    key = generate_AES_KEY(required_key_label)
else:
    print("Using key from storage")

Using key from storage


In [70]:
def encrypt_AES(session, data, iv):
    return key.encrypt(data, mechanism_param=iv)

In [71]:
def decrypt_AES(session, data, iv):
    return key.decrypt(data, mechanism_param=iv)

In [72]:
class Cryptology_object:
    def as_bytes(self):
        return binascii.unhexlify(self.bits)   
    def as_bits(self):
        return binascii.hexlify(self.bytes).decode()

In [73]:
class Iv(Cryptology_object):
    def __init__(self, bits=None):
        if bits==None:
            self.bytes: bytes = session.generate_random(128)
            self.bits = self.as_bits()
        else:
            self.bits = bits
            self.bytes: bytes = self.as_bytes()

In [74]:
class EncryptionDetails(Cryptology_object):
    #https://stackoverflow.com/questions/43787031/python-byte-array-to-bit-array
    def __init__(self, record, iv):
        self.iv: Iv = iv  
        self.bytes: bytes = encrypt_AES(session, str(record).encode(), self.iv.as_bytes())

In [75]:
class DecryptionDetails(Cryptology_object):
    def __init__(self, record, iv):
        self.iv = iv
        self.bits = record
        self.bytes =  self.as_bytes()
        self.value = decrypt_AES(session, self.bytes, iv.as_bytes())

In [76]:
def encrypt_list_data(data_to_encrypt: list, ivs_list: list) -> list:
    return [EncryptionDetails(record, ivs_list[index]) for index, record in enumerate(data_to_encrypt)]

In [77]:
def encrypt_df(df, col_names: list, ivs_list: list):
    col_to_encrypt = [df[col_name].fillna("-").tolist() for col_name in col_names]
    return [encrypt_list_data(col, ivs_list) for col in col_to_encrypt]

## Encryption

In [78]:
ivs = [Iv(index) for index in df_original['id']]
ivs_list = [iv.as_bits() for iv in ivs]
col_names =  ["personal_identification_number", "first_name", "last_name"]
en_cols = [en_object_list for en_object_list in encrypt_df(df_original, col_names, ivs)]
df_final = pd.DataFrame(df_original)

for index, col in enumerate(en_cols):
    df_final.drop(col_names[index], inplace=True, axis=1)
    df1 = pd.DataFrame([en_object.as_bits() for en_object in col])
    df1.columns = [col_names[index]] 
    df_final = pd.concat([df_final, df1], axis=1)

df_final[['id', 'personal_identification_number','first_name', 'last_name', 'school']].to_csv('encrypted.csv', index=False)

## Decryption

In [79]:
def decrypt_list_data(col_to_decrypt: list, col_iv: list) -> list:
    return [DecryptionDetails(record, Iv(col_iv[index])) for index, record in enumerate(col_to_decrypt)]

In [80]:
def decrypt_df(df, col_names: list, col_iv: list):
    cols_list = [df[col_name].tolist() for col_name in col_names]
    return [decrypt_list_data(col, col_iv) for col in cols_list]

In [81]:
df_encrypted = pd.read_csv("encrypted.csv")
row_count = len(df_encrypted)
df_iv = df_encrypted["id"]
col_names = ["personal_identification_number", "first_name", "last_name"]
cols_to_decrypt = [decrypted_list for decrypted_list in decrypt_df(df_encrypted, col_names, df_iv)]
df_final = pd.DataFrame(df_encrypted)

for index, col in enumerate(cols_to_decrypt):
    df_final.drop(col_names[index], inplace=True, axis=1)
    df1 = pd.DataFrame([de_object.value.decode() for de_object in col])
    df1.columns = [col_names[index]]
    df_final = pd.concat([df_final, df1], axis=1)

df_final[['id',  'personal_identification_number', 'first_name', 'last_name', 'school']].to_csv('decrypted.csv', index=False)

# Comparison

In [82]:
df_original = pd.read_csv("original.csv")
df_encrypted = pd.read_csv("encrypted.csv")
df_decrypted = pd.read_csv("decrypted.csv")

In [83]:
df_encrypted

Unnamed: 0,id,personal_identification_number,first_name,last_name,school
0,1d7f8fb0d5e345a8a5154272a5c0e896,0431facb1d8ee9fc5d5fd84d9c6c6bfa,08e0a6085a05ccdeccb2f41040029bbf,8147b41f121944dad8d2828068bad731,UP
1,b163eb7d2b564d3390b308f8c5cf394a,b7ac55a3d3d8fbd191c7331249fecab2,a591a8623f43192ac95ce69234484363,c2f00fe6c63f3f761fb8ca583fc5d973,ZCU
2,a1736f17c4f74bd88a2440285144648c,24b51311ba0e47d4b1f7c1adfd801bf1,57d0cc82af92775fd552e44ac1f26f97,27c7513be635777c3a345e2851f9f579,UP
3,3b7c7730c35c4699850763168e55184b,b782c523d60a2423416d562f0c129f76,969816cfe7217ce27f185da5b28bad6d,6fac4d47f0d9d652ff23a4777244e610,AVU
4,161607f5e6e0416f8dc9b837c09f5804,0771c05aec2d5908c35d9c05eb801e8a,ae032ea582e831ed5b5fd5cf2e5ce07d,1d3522e8392bc624c4b88c954d9ef34d,SU
...,...,...,...,...,...
995,f7517f58356641fe9cd5f50f56eda34a,8747635f09b99670ddd8e735a9e2b047,86007af781497744bf4bd7729c8a4c44,30530a65a97d393050f0bfe01ed96310,AMU
996,3848648d23904b4e8642314c030cf761,c5ca1420ecf50868c19cfcf294ea0ba6,0cef3e45e2e2b05787af5e6c22f3ad3c,c4833d0910f7c9de26c3b57c5d55c323,SU
997,3197e4494e6f419d82d716f33d3d9107,e11267f768805da3569feef391b69ef2,a83823674a32c0860a47e4ac0c606d5c,38b65d6ed81520b0bdb43dfa45bb2b0c,CVUT
998,835513679eaa42d79d89a6a461be5dbe,dc38721e42352e099a643baf238dbf51,b53b870dc07a140531256e98148b5839,03e4d329608c35a48fda64c0e3b846fb,ZCU


In [84]:
df_decrypted

Unnamed: 0,id,personal_identification_number,first_name,last_name,school
0,1d7f8fb0d5e345a8a5154272a5c0e896,707026/8655,Jose,Bjorseth,UP
1,b163eb7d2b564d3390b308f8c5cf394a,481357/1281,Joseph,Keyes,ZCU
2,a1736f17c4f74bd88a2440285144648c,521904/4434,David,Fletcher,UP
3,3b7c7730c35c4699850763168e55184b,587801/3674,Paul,Ries,AVU
4,161607f5e6e0416f8dc9b837c09f5804,959834/1423,Doyle,Albright,SU
...,...,...,...,...,...
995,f7517f58356641fe9cd5f50f56eda34a,390887/8258,Tony,Natividad,AMU
996,3848648d23904b4e8642314c030cf761,905403/2656,Frank,Rogoff,SU
997,3197e4494e6f419d82d716f33d3d9107,190003/5221,Aubrey,Paris,CVUT
998,835513679eaa42d79d89a6a461be5dbe,792786/3494,Theodore,Seville,ZCU


In [85]:
df_original

Unnamed: 0,id,personal_identification_number,first_name,last_name,school
0,1d7f8fb0d5e345a8a5154272a5c0e896,707026/8655,Jose,Bjorseth,UP
1,b163eb7d2b564d3390b308f8c5cf394a,481357/1281,Joseph,Keyes,ZCU
2,a1736f17c4f74bd88a2440285144648c,521904/4434,David,Fletcher,UP
3,3b7c7730c35c4699850763168e55184b,587801/3674,Paul,Ries,AVU
4,161607f5e6e0416f8dc9b837c09f5804,959834/1423,Doyle,Albright,SU
...,...,...,...,...,...
995,f7517f58356641fe9cd5f50f56eda34a,390887/8258,Tony,Natividad,AMU
996,3848648d23904b4e8642314c030cf761,905403/2656,Frank,Rogoff,SU
997,3197e4494e6f419d82d716f33d3d9107,190003/5221,Aubrey,Paris,CVUT
998,835513679eaa42d79d89a6a461be5dbe,792786/3494,Theodore,Seville,ZCU


https://cryptii.com/pipes/aes-encryption

https://text-compare.com/