## Importing Required Libraries

In [5]:
import pandas as pd
import numpy as np
from anonymizedf.anonymizedf import anonymize
from faker import Faker

faker = Faker()

## Reading the Dataset

In [6]:
df = pd.read_csv("empdata.csv")
print(df.head())

     Emp ID     Employee_name Gender Date of Birth  Age in Yrs.  \
0  677509.0       Lois Walker      F     3/29/1981        36.36   
1  940761.0   Brenda Robinson      F     7/31/1970        47.02   
2  428945.0      Joe Robinson      M     6/16/1963        54.15   
3  408351.0       Diane Evans      F    12-04-1977        39.67   
4  193819.0  Benjamin Russell      M     4/17/1977        40.31   

   Weight in Kgs.  Year of Joining    Salary Last % Hike          SSN  \
0            60.0           2003.0  168251.0         21%  467-99-4677   
1            60.0           2008.0   51063.0         27%  537-71-4566   
2            68.0           2016.0   50155.0         16%  025-92-3584   
3            51.0           1999.0  180294.0          1%  314-35-9851   
4            58.0           2013.0  117642.0         13%  121-98-7248   

     Phone No.     Place Name    County          City State      Zip  \
0  303-572-8492        Denver    Denver        Denver    CO  80224.0   
1  225-945-495

## Explicit Identifiers

In [14]:
df[["Emp ID", "Employee_name"]]

Unnamed: 0,Emp ID,Employee_name
0,677509.0,Lois Walker
1,940761.0,Brenda Robinson
2,428945.0,Joe Robinson
3,408351.0,Diane Evans
4,193819.0,Benjamin Russell
...,...,...
96,704709.0,Harold Nelson
97,461593.0,Nicole Ward
98,392491.0,Theresa Murphy
99,495141.0,Tammy Young


## Masking 1: Anonymization

In [4]:
anon = anonymize(df)
anon.fake_names("Employee_name")

Unnamed: 0,Emp ID,Employee_name,Gender,Date of Birth,Age in Yrs.,Weight in Kgs.,Year of Joining,Salary,Last % Hike,SSN,Phone No.,Place Name,County,City,State,Zip,Region,Fake_Employee_name
0,677509.0,Lois Walker,F,3/29/1981,36.36,60.0,2003.0,168251.0,21%,467-99-4677,303-572-8492,Denver,Denver,Denver,CO,80224.0,West,Mrs Hannah Read
1,940761.0,Brenda Robinson,F,7/31/1970,47.02,60.0,2008.0,51063.0,27%,537-71-4566,225-945-4954,Stonewall,De Soto,Stonewall,LA,71078.0,South,Rosie Williams
2,428945.0,Joe Robinson,M,6/16/1963,54.15,68.0,2016.0,50155.0,16%,025-92-3584,219-904-2161,Michigantown,Clinton,Michigantown,IN,46057.0,Midwest,Rachael Evans
3,408351.0,Diane Evans,F,12-04-1977,39.67,51.0,1999.0,180294.0,1%,314-35-9851,215-793-6791,Hydetown,Crawford,Hydetown,PA,16328.0,Northeast,Judith Hunt-Gregory
4,193819.0,Benjamin Russell,M,4/17/1977,40.31,58.0,2013.0,117642.0,13%,121-98-7248,262-404-2252,Fremont,Waupaca,Fremont,WI,54940.0,Midwest,Dr Francis Stevenson
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,704709.0,Harold Nelson,M,12/24/1984,32.61,77.0,2011.0,156194.0,0%,456-99-2066,217-555-9216,Carol Stream,Carol Stream,Carol Stream,IL,60351.0,Midwest,Cameron Robinson
97,461593.0,Nicole Ward,F,12-12-1964,52.66,60.0,1989.0,95673.0,12%,196-84-2551,231-761-7861,Detroit,Wayne,Detroit,MI,48278.0,Midwest,Jean Nicholls
98,392491.0,Theresa Murphy,F,12/30/1987,29.60,57.0,2010.0,51015.0,11%,249-99-8801,907-356-1555,Mc Grath,Yukon-Koyukuk (CA),Mc Grath,AK,99627.0,West,Holly Price
99,495141.0,Tammy Young,F,3/22/1979,38.38,55.0,2015.0,93650.0,7%,771-02-7046,231-290-3075,Alma,Gratiot,Alma,MI,48801.0,Midwest,Antony Ali


In [5]:
df[["Emp ID", "Employee_name","Fake_Employee_name"]]

Unnamed: 0,Emp ID,Employee_name,Fake_Employee_name
0,677509.0,Lois Walker,Mrs Hannah Read
1,940761.0,Brenda Robinson,Rosie Williams
2,428945.0,Joe Robinson,Rachael Evans
3,408351.0,Diane Evans,Judith Hunt-Gregory
4,193819.0,Benjamin Russell,Dr Francis Stevenson
...,...,...,...
96,704709.0,Harold Nelson,Cameron Robinson
97,461593.0,Nicole Ward,Jean Nicholls
98,392491.0,Theresa Murphy,Holly Price
99,495141.0,Tammy Young,Antony Ali


## Masking 2: Shuffling

In [6]:
# Shuffling

df[["Emp ID", "Employee_name"]]

Unnamed: 0,Emp ID,Employee_name
0,677509.0,Lois Walker
1,940761.0,Brenda Robinson
2,428945.0,Joe Robinson
3,408351.0,Diane Evans
4,193819.0,Benjamin Russell
...,...,...
96,704709.0,Harold Nelson
97,461593.0,Nicole Ward
98,392491.0,Theresa Murphy
99,495141.0,Tammy Young


In [7]:
df2 = df.apply(lambda x: x.sample(frac=1).values)

df2[["Emp ID", "Employee_name"]]

Unnamed: 0,Emp ID,Employee_name
0,940922.0,Aaron Price
1,400173.0,Clarence Ross
2,867084.0,Carol Edwards
3,380086.0,Maria Walker
4,227922.0,Theresa Lee
...,...,...
96,639892.0,Cheryl Miller
97,456747.0,Nancy Jones
98,726264.0,Carol Murphy
99,153989.0,Tammy Young


## Masking 3: Encryption

In [83]:
df_vernam=df['Employee_name']

print(df_vernam)

0           Lois Walker
1       Brenda Robinson
2          Joe Robinson
3           Diane Evans
4      Benjamin Russell
             ...       
96        Harold Nelson
97          Nicole Ward
98       Theresa Murphy
99          Tammy Young
100                 NaN
Name: Employee_name, Length: 101, dtype: object


In [84]:
def vernamcipher(plaintext,key,spaceIndex):
    plaintext="".join(plaintext.split())
    if(len(plaintext)!=len(key)):
        val=len(plaintext)//len(key)
        key=key*val
        diff=len(plaintext)-len(key)
        key+=key[0:diff]
    ordpt=[ord(x)%65 for x in plaintext]
    ordk=[ord(y)%65 for y in key]
    viglist=[ordpt[m]+ordk[m] for m in range(0,len(ordpt))]
    for i in range(0,len(viglist)):
        if(viglist[i]>=26):
            viglist[i]-=26
    encrypted=''
    for j in viglist:
        encrypted+=chr(j+65)
        
    encrypted=encrypted[0:spaceIndex]+" "+encrypted[spaceIndex::]
    return encrypted

def add_inv(num):
    for i in range(0,26):
        if((num+i)%26):
            return i

def vernamcipherdecrypt(ciphertext,key, spaceIndex):
    ciphertext="".join(ciphertext.split())
    if(len(ciphertext)!=len(key)):
        val=len(ciphertext)//len(key)
        key=key*val
        diff=len(ciphertext)-len(key)
        key+=key[0:diff]
        
    ordct=[ord(x)%65 for x in ciphertext]
    ordk=[ord(y)%65 for y in key]
    viglist=[(ordct[m]-ordk[m])%26 for m in range(0,len(ordct))]
    decrypted=''
    for j in viglist:
        decrypted+=chr(j+65)
        
    decrypted=decrypted[0:spaceIndex]+" "+decrypted[spaceIndex::]
    return decrypted

def getIndSp(textArr):
    return textArr.index(" ")

In [85]:
df_vernam=df['Employee_name']

keyInp=input("Enter key value for the encryption: ")
keyx=keyInp.upper()

for i in range(len(df_vernam)-1):
    plainTxt=df_vernam[i]
    if(plainTxt==np.NaN):
        continue
    plainx=plainTxt.upper()
    spaceIndex=getIndSp(plainx)
    enc_i=vernamcipher(plainx,keyx,spaceIndex)
    df_vernam=df_vernam.replace(plainTxt, enc_i)

df_vernam


Enter key value for the encryption: hello


0           SSTD KHPVPF
1       IVPYRH VZMWUWZY
2          QSP CCIMYDCU
3           KMLYS LZLYG
4      IIYUOTMY CIZWPWZ
             ...       
96        OECZZK RPWGVR
97          UMNZZL ALCR
98       ALPCSZE XFFWLJ
99          AEXXM FSFYU
100                 NaN
Name: Employee_name, Length: 101, dtype: object

In [86]:
df_vernam_cipher=df_vernam

keyInp=input("Enter key value for the encryption: ")
keyx=keyInp.upper()

for i in range(len(df_vernam_cipher)-1):
    cipherTxt=df_vernam_cipher[i]
    if(cipherTxt==np.NaN):
        continue
    cipherx=cipherTxt.upper()
    spaceIndex=getIndSp(cipherx)
    dec_i=vernamcipherdecrypt(cipherx,keyx,spaceIndex)
    df_vernam_cipher=df_vernam_cipher.replace(cipherTxt, dec_i)

df_vernam_cipher


Enter key value for the encryption: hello


0           LOIS WALKER
1       BRENDA ROBINSON
2          JOE ROBINSON
3           DIANE EVANS
4      BENJAMIN RUSSELL
             ...       
96        HAROLD NELSON
97          NICOLE WARD
98       THERESA MURPHY
99          TAMMY YOUNG
100                 NaN
Name: Employee_name, Length: 101, dtype: object

In [23]:
st="LOIS WALKER"
# print(st.index(" "))
tempArr="".join(st.split())
print(tempArr)
spInd=st.index(" ")
stUpd=tempArr[0:spInd]+" "+tempArr[spInd::]
print(stUpd)

LOISWALKER
LOIS WALKER


In [7]:
df_hybrid=df['Employee_name']

print(df_hybrid)

0           Lois Walker
1       Brenda Robinson
2          Joe Robinson
3           Diane Evans
4      Benjamin Russell
             ...       
96        Harold Nelson
97          Nicole Ward
98       Theresa Murphy
99          Tammy Young
100                 NaN
Name: Employee_name, Length: 101, dtype: object


In [27]:
from asyncore import write
from Crypto.Cipher import AES
import sys
import random

letters =[chr(i) for i in range(ord('a'),ord('z'))]
letters+=[chr(i) for i in range(ord('A'),ord('Z'))]
letters+=[' ','.','?']

def factors(a):
    l1 = []
    co_primes = []
    for i in range(1, a+1):
        if(a % i == 0):
            l1.append(i)
    for i in range(1, a+1):
        l2 = []
        for j in range(1, i):
            if(i % j == 0):
                l2.append(j)
        if((set(l1) & set(l2) == {1}) and i != 2 and a % i != 0):
                co_primes.append(i)

    return co_primes[1]

def private_key(e, n):
    for i in range(1, n):
        if((e*i) % n == 1):
            return i

def isPrime(n):
    if n == 2:
        return True
    if n % 2 == 0 or n <= 1:
        return False
    for i in range(3, int(n**0.5)+1, 2):
        if n % i == 0:
            return False
    return True


def AES128(key, plaintext):
    IV='0123456789abcdef'
    aesKey = AES.new(key.encode('utf8'), AES.MODE_CBC, IV.encode('utf-8'))
        
    if len(plaintext) % 16 != 0:
        for i in range(16 - (len(plaintext) % 16)):
            plaintext += "0"
    
    ciphertext = aesKey.encrypt(plaintext.encode('utf-8'))
    return ciphertext

def RSA_encrypt(e, n, plain_text):
    cipher_text = (plain_text**e) % n
    return cipher_text

def RSAencrypt(text,e,n):
    plain_text_list = []
    for i in range(0, len(text)):
        for j in range(0, len(letters)):
            if(text[i] == letters[j]):
                plain_text_list.append(j)
            
    cipher_text = []

    for i in range(0, len(plain_text_list)):
        cipher_text.append(RSA_encrypt(e, n, int(plain_text_list[i])))

    encoded_message = ""
    for i in cipher_text:
        encoded_message += str(i) + ' '
        
    return encoded_message

def KeySetup():
    print("\t\t*******KEY SETUP PROCESS*******")
    print("For AES:\n")
    key = ''.join(chr(random.randint(65, 90)) for i in range(16))
    print("The AES-128 key is: ", key)
    with open("keyfile.txt","w") as f:
        f.write(key)
    print("Key successfully saved in file keyfile.txt\n")
    print("For RSA:")
    choice = input("Do you want to enter prime numbers manually or I should generate them for you? (y/n)\n")
    if choice == 'y':
        p = int(input("\nEnter the prime number 1 for public key: "))
        q = int(input("Enter the prime number 2 for public key: "))
    else:
        p = random.randint(1,100)
        q = random.randint(1,100)
        while not isPrime(p):
            p = random.randint(1,100)
        while not isPrime(q):
            q = random.randint(1,100)
        print(f"The prime numbers are: {p}, {q}")
    n = p * q
    function_n = (p-1)*(q-1)
    e = factors(function_n)
    d = private_key(e,function_n)
    print("\nThe random value of e that has been selected is: ",str(e))
    print("\nThe public key for the RSA algorithm is: {",str(e),",",str(n),"}")
    print("\nThe private key for the RSA algorithm is: {",str(d),",",str(n),"}")

    return e,d,n,function_n

def getIndSp(textArr):
    return textArr.index(" ")



e,d,n,phi_n=KeySetup()

df_hybrid=df['Employee_name']

# AESresult=AES128('abcdef1234567890', "LOISWALKER")


RSAresult=RSAencrypt("keyfile.txt",e,n)
print("The key is encrypted as",RSAresult)

for i in range(len(df_hybrid)-1):
    plainTxt=df_hybrid[i]
    if(plainTxt==np.NaN):
        continue
    plainx=plainTxt.upper()
    spaceIndex=getIndSp(plainx)
    
    enc_i=AES128('abcdef1234567890', df_hybrid[i])
    df_hybrid=df_hybrid.replace(plainTxt, enc_i)
    
df_hybrid

		*******KEY SETUP PROCESS*******
For AES:

The AES-128 key is:  IRRBJUUDDBSWHPIS
Key successfully saved in file keyfile.txt

For RSA:
Do you want to enter prime numbers manually or I should generate them for you? (y/n)
n
The prime numbers are: 23, 37

The random value of e that has been selected is:  7

The public key for the RSA algorithm is: { 7 , 851 }

The private key for the RSA algorithm is: { 679 , 851 }
The key is encrypted as 750 215 116 684 288 122 215 615 61 828 61 


0      b'\x93\xc2\xf1\xf0\xdf\r#\x9d_\x94^\xa6\\\xbe\...
1              b'\xf0\t-\xcc(\xd3\xb9\xe8d@<h\xbdma\xfd'
2            b'Dj"\x02H\xdf\xa6\xa8\xb4K\x91%\xd9s,\xc5'
3           b'_\x13l\xe0\x07\xd7\xdek\x0c\xb0\\6Wfl\xc9'
4      b'\xc6\x8bT\xd4\xcc{\x14\xf8P\x96\x8d$\x8d5\xdeo'
                             ...                        
96     b'\xc2\xf8N8\xee\x89\x84\xc9J\xc6\x1ey\xcf\xf0J}'
97     b'o\xea\xb0\xb4\x81\xa7\xa7\xa3hF\x86\x08\x89\...
98        b'\xe8IY \xdb\xcb\x0f\xbd\x851s(\x01A\x18\xa6'
99                b'3\\"\x19\xc9\xfclT\xc7\xbaB\xb0<s[#'
100                                                  NaN
Name: Employee_name, Length: 101, dtype: object