In [2]:
import datafaucet as dfc

In [3]:
# start the engine
engine = dfc.engine('spark')

In [4]:
df = dfc.range(1000)

In [5]:
from pyspark.sql import functions as F

df = (df
    .cols.create('prefix').randchoice(['032', '095'])  
    .cols.create('number').randint(1111111, 9999999)  
    .cols.get('number').cast('string')
    .cols.create('x').fake('first_name')
    .cols.create('y').fake('last_name')
).cache()

#concat trunc + code + number
df = df.select(F.concat('prefix', 'number').alias('number'), 'x', 'y')

In [9]:
cols = ['number', 'y']
{x.name: x.dataType for x in list(df.schema) if x.name in cols}


{'number': StringType, 'y': StringType}

In [92]:
df.count()

1000

In [93]:
df.data.grid(5)

Unnamed: 0,number,x,y
0,958777210,Anthony,Smith
1,329257921,Debra,Jones
2,955655843,Samuel,Alvarado
3,958063467,Casey,Flowers
4,327897686,Debbie,Larson


In [94]:
### encrypt and obscure

In [95]:
key = dfc.crypto.generate_key('mysecret')
key

b'6utnncOT8mV779t7_yuRbywwZD6GoqrqHC9vsxShxhk='

In [96]:
df = (df
     .cols.get('x').obscure('mysecret')
     .cols.get('y').encrypt(key)
)

In [97]:
df.cols.get('x', 'y').data.grid(5)

Unnamed: 0,x,y
0,0xFn5+WRkQEA,Z0FBQUFBQmQ4YUU3ZFJMSk5ERkZDcUNVYTdPM2RqdmdzZm...
1,05QRFGcCAA==,Z0FBQUFBQmQ4YUU3ZFRFbFZwQ2s3ZDVmOWQ5U0RTekxibT...
2,s5OQE2CTAwA=,Z0FBQUFBQmQ4YUU3czBlV0NwNzBiYmY2VFVzSXVoV1VlUV...
3,05NgYJACAA==,Z0FBQUFBQmQ4YUU3V1NtX3hwaVNKV2ZLdlRuSGUtZHU4RG...
4,05QRZOcSBwA=,Z0FBQUFBQmQ4YUU3SVZEdjVpdmszT0pIYzNnY1FibnhZUm...


In [98]:
df = (df
     .cols.get('x').unravel('mysecret')
     .cols.get('y').decrypt(key)
)

In [99]:
df.cols.get('x', 'y').data.grid(5)

Unnamed: 0,x,y
0,Anthony,Smith
1,Debra,Jones
2,Samuel,Alvarado
3,Casey,Flowers
4,Debbie,Larson


More complicated obfuscation/encryption can be done with a custome function.  
The following for instance concatenate the first 3 digits in clear followed by the obscured phone number

In [100]:
from pyspark.sql import functions as F
from datafaucet.spark.functions import obscure, unravel

def mobile_obscure(key):
        return lambda c: F.concat(F.substring(c, 1, 3), F.lit('%'), obscure(key)(c))

def mobile_unravel(key):
    return lambda c: unravel(key)(F.element_at(F.split(c, '%'),2))


In [101]:
res = df.cols.get('number').apply(mobile_obscure('mysecret'))
res.data.grid(5)

Unnamed: 0,number,x,y
0,095%i3Vwiw1xDXKL8QQA,Anthony,Smith
1,032%i/VyjAl0D/KN9wAA,Debra,Jones
2,095%i3VwCwh1D/CJ9AIA,Samuel,Alvarado
3,095%i3Vwiw12CXOI9gMA,Casey,Flowers
4,032%i/VyDIr2DnIK9QcA,Debbie,Larson


In [102]:
res = res.cols.get('number').apply(mobile_unravel('mysecret'))
res.data.grid(5)

Unnamed: 0,number,x,y
0,958777210,Anthony,Smith
1,329257921,Debra,Jones
2,955655843,Samuel,Alvarado
3,958063467,Casey,Flowers
4,327897686,Debbie,Larson
