In [110]:
from multiprocessing.connection import address_type

from dateutil.relativedelta import relativedelta
from argon2 import PasswordHasher
from datetime import datetime
from faker import Faker
import pandas as pd
import random as rd
import re

SEED = 29
Faker.seed(SEED)
rd.seed(SEED)

ph = PasswordHasher()
fk = Faker('pt-BR')
QNT = 10

In [111]:
# FIXME: make the amount of addresses to be passed as parameter

def generate_addresses():
    complements = ['Casa 2', 'Apto. 162', 'Casa B', 'Galpão A', None, None, None, None, None, None]
    raw_addresses = []

    for ad in [fk.address() for _ in range(2 * QNT - 1)]:
        split_ad = ad.split('\n')
        street_address = split_ad[0].split(',')
        street = street_address[0]
        number = int(street_address[1]) if len(street_address) > 1 else 0
        temp, state = split_ad[2].split('/')
        postal_code, city = temp.split(' ', maxsplit=1)
        postal_code = postal_code.replace('-', '')
        complement = rd.choice(complements)

        raw_addresses.append(('Brasil', state, city, street, number, postal_code, complement))

    return raw_addresses

In [112]:
def id_generator(df: pd.DataFrame):
    next_id = 0
    while True:
        next_id = (next_id + 1) % (df.shape[0] + 1)
        if next_id == 0: continue
        yield next_id

In [113]:
addresses = pd.DataFrame(
    data=generate_addresses(),
    columns=['country', 'state', 'city', 'street', 'number', 'postal_code', 'complement']
)

seq_address_id = id_generator(addresses)

display(addresses)

Unnamed: 0,country,state,city,street,number,postal_code,complement
0,Brasil,MT,Monteiro,Trevo de da Paz,9,66001735,
1,Brasil,SE,Jesus,Estrada de Machado,73,98386448,Apto. 162
2,Brasil,RS,Leão,Lagoa Sá,89,52425186,
3,Brasil,AL,Silveira,Vale Leonardo Duarte,14,66826240,
4,Brasil,SC,Alves,Vereda Moreira,0,8022753,
5,Brasil,AM,Pinto,Alameda José Correia,3,63338353,
6,Brasil,RO,Monteiro do Sul,Viela Leão,6,92461606,Apto. 162
7,Brasil,PE,Cavalcanti de Ramos,Distrito Cavalcante,89,53707393,
8,Brasil,RN,Sampaio,Área Porto,90,40143531,
9,Brasil,MA,Gonçalves dos Dourados,Colônia da Cunha,15,33634485,


In [114]:
gen_date = lambda: fk.date_between(datetime.now() - relativedelta(years=50), datetime.now() - relativedelta(years=12))
gen_password = lambda size: fk.password(length=size)

users = pd.DataFrame({
    'cpf': [fk.cpf() for _ in range(QNT)],
    'profile_pic': [None] * QNT,
    'email': [fk.email() for _ in range(QNT)],
    'username': [fk.name() for _ in range(QNT)],
    'phone': [fk.cellphone_number() for _ in range(QNT)],
    'birthday': [gen_date() for _ in range(QNT)],
    'is_active': [True] * QNT,
    'raw_password': [gen_password(8) for _ in range(QNT)],
    'address_id': [next(seq_address_id) for _ in range(QNT)],
})

users['password'] = users['raw_password'].apply(ph.hash)
users['phone'] = users['phone'].apply(lambda x: re.sub(r'[^0-9]', '', x))
users.to_csv('csv/test_logins.csv', columns=['email', 'raw_password'])
users.drop(columns=['raw_password'], inplace=True)

display(users)

Unnamed: 0,cpf,profile_pic,email,username,phone,birthday,is_active,address_id,password
0,189.267.053-40,,asafemartins@example.com,Ana Liz Aragão,5507185288080,2013-07-01,True,1,"$argon2id$v=19$m=65536,t=3,p=4$YM/Mk7MW5l1MRLd..."
1,593.206.147-25,,ana-clara79@example.com,Ágatha Dias,5508440880934,1981-04-17,True,2,"$argon2id$v=19$m=65536,t=3,p=4$QIE89f9cXKKR4lf..."
2,392.810.756-95,,sophiasales@example.org,Dr. Arthur Gabriel Camargo,3005765184,2010-08-23,True,3,"$argon2id$v=19$m=65536,t=3,p=4$2RgGuK/L1W2SeM4..."
3,794.256.801-67,,ferreiraraquel@example.org,Dom Costa,5503106865505,1981-05-24,True,4,"$argon2id$v=19$m=65536,t=3,p=4$2ia6CS5XnP3aipY..."
4,820.651.437-62,,davi-lucca02@example.net,Rafael Cardoso,8146307849,1997-12-06,True,5,"$argon2id$v=19$m=65536,t=3,p=4$dNEw4cq4gqlnrgG..."
5,253.067.984-38,,maria-florda-cruz@example.net,Maria Isis Peixoto,5508160758925,1990-11-10,True,6,"$argon2id$v=19$m=65536,t=3,p=4$0Zh5iP0CTr082Lv..."
6,829.037.516-68,,bryanvasconcelos@example.org,Laís Albuquerque,3195172944,1994-08-19,True,7,"$argon2id$v=19$m=65536,t=3,p=4$2abQppw4DXrOAmo..."
7,245.369.718-82,,pedro30@example.com,Thomas Gonçalves,3005156940,2000-07-09,True,8,"$argon2id$v=19$m=65536,t=3,p=4$7yCjIVX3nrmlEpa..."
8,586.097.412-49,,portolaura@example.com,Dra. Eloah Fogaça,7101250297,1987-03-12,True,9,"$argon2id$v=19$m=65536,t=3,p=4$aOq5rJBmx9A3Cvd..."
9,784.026.591-01,,nunesdavi@example.com,Marcela Vargas,555134478274,2000-12-14,True,10,"$argon2id$v=19$m=65536,t=3,p=4$Mh2kYxABmtutu2B..."


In [115]:
stores = pd.DataFrame({
    'cnpj': [fk.cnpj() for _ in range(QNT)],
    'name': [fk.company() for _ in range(QNT)],
    'picture_url': [None] * QNT,
    'email': [fk.company_email() for _ in range(QNT)],
    'is_active': [True] * QNT,
    'raw_password': [gen_password(8) for _ in range(QNT)],
    'address_id': [next(seq_address_id) for _ in range(QNT)]
})

stores['password'] = stores['raw_password'].apply(ph.hash)
stores.to_csv('csv/test_store_logins.csv', columns=['email', 'raw_password'])
stores.drop(columns=['raw_password'], inplace=True)

seq_store_id = id_generator(stores)

display(stores)

Unnamed: 0,cnpj,name,picture_url,email,is_active,address_id,password
0,23.051.847/0001-09,Alves,,guerrajoao-gabriel@moreira.com,True,11,"$argon2id$v=19$m=65536,t=3,p=4$qU2c36hTDcmQZdL..."
1,05.976.412/0001-85,Silva,,oazevedo@silveira.com,True,12,"$argon2id$v=19$m=65536,t=3,p=4$6jJG/S/BWXXhAqS..."
2,29.046.581/0001-84,Novaes,,vitor-hugo36@abreu.com,True,13,"$argon2id$v=19$m=65536,t=3,p=4$HVQIUSajAQMScXB..."
3,98.720.645/0001-01,Barros,,alicepeixoto@da.br,True,14,"$argon2id$v=19$m=65536,t=3,p=4$fr3dw/ZH4T3i5ez..."
4,80.423.567/0001-00,Rocha Nogueira e Filhos,,gazevedo@aparecida.br,True,15,"$argon2id$v=19$m=65536,t=3,p=4$AqHQLYQhmrVAiVL..."
5,58.403.967/0001-02,Novais,,marcela49@pinto.com,True,16,"$argon2id$v=19$m=65536,t=3,p=4$48kUUAqBOhSYUsA..."
6,84.576.219/0001-23,Duarte e Filhos,,wnovais@pereira.com,True,17,"$argon2id$v=19$m=65536,t=3,p=4$NHstNhBcqZbH0xc..."
7,64.075.293/0001-59,Costela,,da-rochathomas@da.com,True,18,"$argon2id$v=19$m=65536,t=3,p=4$hj6LUX0A4mUUfJS..."
8,26.019.853/0001-69,Lopes Ltda.,,melissa07@santos.com,True,19,"$argon2id$v=19$m=65536,t=3,p=4$vr2uwdXkLQA/QBZ..."
9,27.136.580/0001-03,Garcia - ME,,joseteixeira@andrade.net,True,1,"$argon2id$v=19$m=65536,t=3,p=4$HxbYEh+YbL9rVnc..."


In [117]:
store_phones = pd.DataFrame({
    'phone': [fk.cellphone_number() for _ in range(QNT + QNT // 5)],
    'store_id': [next(seq_store_id) for _ in range(QNT + QNT // 5)]
})

store_phones['phone'] = store_phones['phone'].apply(lambda x: re.sub(r'[^0-9+]', '', x))

display(store_phones)

Unnamed: 0,phone,store_id
0,5578954035359,1
1,5513998579151,2
2,5534903288243,3
3,5502991316922,4
4,5551937218661,5
5,5552992630217,6
6,5585948823170,7
7,5595934842890,8
8,5585948704614,9
9,5533902401052,10
