In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta
import bcrypt
import uuid
from typing import Tuple
import secrets
import string


def get_pwd_hash(password: str, salt: str | bytes) -> str:
    if type(salt) == str:
        salt = salt.encode('utf-8')
    
    password = password.encode('utf-8')
            
    password_hash = bcrypt.hashpw(password, salt)
    password_hash_str = password_hash.decode('utf-8')

    return password_hash_str
      
def generate_salt_and_hash(password: str) -> Tuple[str, str]:
    salt = bcrypt.gensalt(rounds=4)
    
    password_hash_str = get_pwd_hash(password, salt)

    salt_str = salt.decode('utf-8')

    return salt_str, password_hash_str

def random_day(year):
    # Jan 1 of 'year'
    start_date = datetime(year, 1, 1)
    
    # Jan 1 of 'year' + 1
    end_date = datetime(year + 1, 1, 1)
    
    # Total days in 'year'
    total_days = (end_date - start_date).days
    
    # Random day in 'year'
    random_day = random.randrange(total_days)
    
    # Date of the random day
    date = start_date + timedelta(days=random_day)
    
    return date

def generate_password(length):
    # All the characters that can be used to generate a password
    all_characters = string.ascii_letters + string.digits #+ string.punctuation

    # Use secrets.choice to select characters randomly
    password = ''.join(secrets.choice(all_characters) for i in range(length))
    
    return password

In [2]:
df = pd.read_csv('names_raw.csv', header=None)

In [3]:
df.columns = ['name', 'age', 'city']

In [4]:
df['first_name'] = df['name'].apply(lambda x: x.split(' ')[1])
df['last_name'] = df['name'].apply(lambda x: x.split(' ')[0])
df['birthday_year'] = df['age'].apply(lambda x: 2023 - int(x))

In [5]:
df['birthday'] = df['birthday_year'].apply(lambda x: random_day(x)).astype(str)

In [6]:
df['biography'] = df['first_name'] + ' ' + df['last_name'] + ' is a ' + df['age'].astype(str) + ' year old ' + df['city'] + ' native.'

In [7]:
password = 'password'
salt, pwd_hash = generate_salt_and_hash(password)

df['password'] = password
df['password_salt'] = salt
df['password_hash'] = pwd_hash

In [8]:
df['deleted'] = False

In [9]:
df['created_at'] = datetime.now()
df['edited_at'] = datetime.now()

In [10]:
df['id'] = df['name'].apply(lambda x: uuid.uuid4()).astype(str)

In [11]:
df = df[['id', 'created_at', 'edited_at', 'password_salt', 'password_hash', 'deleted', 'first_name', 'last_name', 'birthday', 'biography', 'city']]

In [12]:
df

Unnamed: 0,id,created_at,edited_at,password_salt,password_hash,deleted,first_name,last_name,birthday,biography,city
0,cf0af7da-4ed9-46e1-94e1-4690eb9c8775,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Роберт,Абрамов,2012-01-09,Роберт Абрамов is a 11 year old Воткинск native.,Воткинск
1,f977d720-3afa-4ef7-a363-d95b1a2d5990,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Александр,Абрамов,1916-07-09,Александр Абрамов is a 107 year old Домодедово...,Домодедово
2,cf485eed-7ddf-4767-a2e6-c99580c478e8,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Илья,Абрамов,1912-04-20,Илья Абрамов is a 111 year old Севастополь nat...,Севастополь
3,4134f601-e428-4d11-be7a-a7a2f8c5ea5d,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Даниил,Абрамов,1918-09-13,Даниил Абрамов is a 105 year old Ржев native.,Ржев
4,e276a983-ed49-4512-aad2-b73878c4e2e6,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Лев,Абрамов,2007-07-25,Лев Абрамов is a 16 year old Когалым native.,Когалым
...,...,...,...,...,...,...,...,...,...,...,...
999995,e6467b86-3c39-46c6-a4aa-92f5069ca2d1,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Полина,Яшина,1909-09-20,Полина Яшина is a 114 year old Клин native.,Клин
999996,eb5deda6-ba4e-4f87-b661-546aae25c271,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Надежда,Яшина,1964-11-18,Надежда Яшина is a 59 year old Зеленодольск na...,Зеленодольск
999997,cdafee40-5f6f-4e22-a9b2-984d955d6b20,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Екатерина,Яшина,1957-02-02,Екатерина Яшина is a 66 year old Ставрополь na...,Ставрополь
999998,54f08304-5914-4b7a-8ae4-2ea9d6e87f72,2023-06-25 13:28:26.306904,2023-06-25 13:28:26.308012,$2b$04$1BXeXqb9Xr8WOSasN4dpbu,$2b$04$1BXeXqb9Xr8WOSasN4dpbuVPDi6ELjO7r2MAl8K...,False,Анна,Яшина,1983-01-12,Анна Яшина is a 40 year old Березники native.,Березники


In [13]:
df.to_parquet('profiles.parquet', index=False, compression='gzip')