In [2]:
from faker import Faker 
import pandas as pd 

fake = Faker()

def create_fake_data(num=100):
    return pd.DataFrame({
        'Name': [fake.name() for _ in range(num)],
        'Email': [fake.email() for _ in range(num)],
        'DOB': [fake.date_of_birth(minimum_age=18, maximum_age=90) for _ in range(num)],
        'Address': [fake.address() for _ in range(num)]
    })

df = create_fake_data(100)
print(df.head())

            Name                    Email         DOB  \
0   Adam Johnson        oross@example.com  1999-06-17   
1     Lori Burke        qdiaz@example.org  1976-05-22   
2     Tyler Leon     pamela14@example.org  1981-03-18   
3  Patrick Jones       hpetty@example.com  1959-12-19   
4     Corey Hill  henryronald@example.com  1950-11-13   

                                             Address  
0             75464 Brianna Run\nBurkeview, AL 05695  
1       13885 Kenneth Drive\nNew Ericaland, DE 85295  
2  631 Palmer Green Apt. 338\nPhillipschester, PR...  
3  354 Roberts Overpass Apt. 561\nSinghberg, OR 4...  
4      03449 Castillo Stream\nEast Stephen, IL 47006  


In [3]:
df['DOB'] = pd.to_datetime(df['DOB'], errors='coerce')
df_anonymized = df.drop(columns=['Name', 'Email', 'Address'])
current_year = pd.Timestamp.now().year
df_anonymized['Age'] = current_year - df_anonymized['DOB'].dt.year 

bins = [0, 30, 60, 100]
labels = ['Young', 'Middle-aged', 'Old']
df_anonymized['Age Range'] = pd.cut(df_anonymized['Age'], bins=bins, labels=labels)

df_anonymized.drop(columns=['DOB', 'Age'], inplace=True)
df_anonymized.head()

Unnamed: 0,Age Range
0,Young
1,Middle-aged
2,Middle-aged
3,Old
4,Old


In [4]:
import hashlib

def hash_data(data):
    return hashlib.sha256(data.encode()).hexdigest() 

df_pseudonymized = df.copy() 
df_pseudonymized['Name'] = df['Name'].apply(hash_data)
df_pseudonymized['Email'] = df['Email'].apply(lambda x: fake.lexify(text="????@????.com", letters='abcdefghijklmnopqrstuvwxyz'))

print(df_pseudonymized.head())

                                                Name          Email  \
0  8e8de8998cf7c23eb57184fbfb2569dbc7d397bc1b7477...  jrcx@aojr.com   
1  f63afa027696c3545256e20ece21be0cd2475684b721c2...  vanv@bffc.com   
2  08a01847c76d5005a5ac05af768e3d2586d46d0f59531c...  suak@etpu.com   
3  3c11e2192e8c629b42184b209feb998984ac969fc5a030...  imvc@qmmj.com   
4  46e241537711d05cf4e664bb21a4354f7a412a1f3d38be...  yred@yrfh.com   

         DOB                                            Address  
0 1999-06-17             75464 Brianna Run\nBurkeview, AL 05695  
1 1976-05-22       13885 Kenneth Drive\nNew Ericaland, DE 85295  
2 1981-03-18  631 Palmer Green Apt. 338\nPhillipschester, PR...  
3 1959-12-19  354 Roberts Overpass Apt. 561\nSinghberg, OR 4...  
4 1950-11-13      03449 Castillo Stream\nEast Stephen, IL 47006  
