In [None]:
!pip install Faker
!pip install Unidecode

In [None]:
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime
import random
import re
from dateutil.relativedelta import relativedelta
from unidecode import unidecode

In [None]:
fake = Faker(["pt_PT"])

class Portuguese_Citizen:
    def __init__(self):

        self.sex = random.choice(["F", "M"])

        if self.sex == 'F':
            self.first_name = fake.first_name_female()
            self.last_name = fake.last_name_female()
        else:
            self.first_name = fake.first_name_male()
            self.last_name = fake.last_name_male()

        self.full_name = self.first_name + ' ' + self.last_name
        
        self.father_first_name = fake.first_name_male()
        self.father_last_name = fake.last_name_male()
        self.mother_first_name = fake.first_name_female()
        self.mother_last_name = fake.last_name_female()

        self.birthdate = fake.date_of_birth(minimum_age=10, maximum_age=110)
        self.city_of_birth = fake.city()
        #self.region_of_birth = fake.region()
        self.country = 'Portugal'
        self.nationality = 'PRT' if random.random() > .1 else fake.country_code(representation='alpha-3')

        self.electoral_number = fake.bothify(text='#####') + self.birthdate.strftime('%y') + fake.bothify(text='######')
        
        # address
        #self.region = fake.region()
        self.city = fake.city()
        # self.street = fake.street()
        self.postcode = fake.postcode()
        self.building_number = fake.building_number()

        #id
        self.id_civil_number = fake.bothify(text='########')
        self.id_document_number = self.id_civil_number + ' ' + fake.bothify(text='# ZZ#')
        self.id_expiry_date = fake.date_between(start_date='today', end_date='+10y')
        self.tax_number = fake.bothify(text='000#######')
        self.social_security_number = fake.bothify(text='###############')
        self.health_number = fake.bothify(text='000############')


        # passport
        self.passport_number = fake.bothify(text='??#######', letters='ABEHIKMNOPTXYZ') # https://www.google.gr/search?tbm=isch&q=passport+greece
        self.passport_publish_date = fake.date_between(start_date='-5y', end_date='today')
        self.passport_expiration_date = self.passport_publish_date + relativedelta(years=5, days=-1)
        self.passport_country = self.nationality
        self.passport_nationality = 'PORTUGUESA'
        self.passport_identifying_number = fake.bothify(text='#######')
        self.height = round(np.random.normal(1.72, 0.15), 2)



        # health insurance
        self.health_insurance_publish_date = fake.date_between(start_date='-15y', end_date='today')
        self.health_insurance_expiration_date = self.passport_publish_date + relativedelta(years=3, days=-1)


        # medical history
        self.disability_record = None if random.random() > .2 else random.choice(
        ["Blindness", "Low-vision", "Leprosy", "Hearing impairment", "Locomotor Disability", "Dwarfism",
        "Intellectual Disability", "Mental Illness", "Autism Spectrum Disorder (ASD)", "Cerebral Palsy (CP)",
        "Muscular Dystrophy (MD)", "Chronic Neurological Condition", "Specific Learning Disability",
        "Multiple Sclerosis (MS)", "Speech and Language Disability", "Thalassemia", "Hemophilia",
        "Sickle Cell Disease", "Multiple Disabilitt", "Acid Attack Victim", "Parkinson’s disease"]) # https://wecapable.com/types-of-disabilities-list/

        self.disability_rate = round(random.random(),1) if self.disability_record else None


        # proof of income  -- https://i.pinimg.com/736x/63/84/4b/63844b5a06461dfa6c3852924ab6e52e.jpg
        self.company = fake.company()
        self.work_start_date = fake.date_between(start_date='-15y', end_date='today')
        self.work_position = fake.job()
        self.salary = int(random.random()*50000)


        # school records
        self.school_certificate_date = self.birthdate + relativedelta(years=18, days=random.randint(1,100))
        self.school_certificate_id = '{}-{}'.format(fake.bothify(text='####'), self.school_certificate_date)
        self.school_certificate_grade_value = round(random.random()*10 + 10, 1) # range [10,20]
        self.ssn = self.birthdate.strftime('%d%m%y') + '0' + fake.bothify(text='####')
        self.tin = fake.bothify(text='#########') # AFM --- https://vathygalazio.files.wordpress.com/2013/03/vatlidator2-2.png


        ## criminal record
        
        if random.random() > .1:
          self.criminal_record = '-'
        else:
          if random.random() > .5:
            self.criminal_record = 'Ofensa criminal - '
          else:
            self.criminal_record = 'Ofensa Civil - '
          self.criminal_record += fake.date_between(start_date='-15y', end_date='today').strftime('%d.%m.%Y')


    def getID(self):
                      # front: https://authenticdocseu.com/wp-content/uploads/2020/07/1_4JjID2aVkMdvcdW16gUdtQ.jpeg
                      # back: https://amazingltools.com/wp-content/uploads/2020/08/Portugal-ID-back-1.png
        return {
            'APELIDO[S] | SURNAME': self.last_name,
            'NOME[S] | GIVEN NAME': self.first_name,
            'SEXO | SEX': self.sex,
            'ALTURA | HEIGHT': self.height,
            'NACIONALIDADE | NATIONALITY': self.nationality,
            'DATA DE NASCIMENTO': self.birthdate.strftime('%d %m %Y'),
            'NÚMERO ID CIVIL | CIVIL ID No.': self.id_civil_number,
            'NÚMERO DOCUMENTO | DOCUMENT No.': self.id_document_number,
            'DATA DE VALIDADE | EXPIRY DATE': self.id_expiry_date.strftime('%d %m %Y'),
            'FILIAÇÃO | PARENTS': '{} {} | {} {}'.format(self.father_first_name, self.father_last_name, self.mother_first_name, self.mother_last_name),
            'NÚMERO IDENTIFICAÇÃO FISCAL | TAX No.': self.tax_number,
            'NÚMERO SEGURANÇA SOCIAL | SOCIAL SECURITY No.': self.social_security_number,
            'NÚMERO UTENTE DE SAÚDE | HEALTH No.': self.health_number,
        }

    def getBirthCertificate(self):
        return {
            'Número Id Civil | Civil Id No.': self.id_civil_number,
            'Apelido(s)/Surname': self.last_name,
            'Name(s) próprio(s)/Given name(s)': self.first_name,
            'Nome do Pai/Father Name': '{} {}'.format(self.father_first_name, self.father_last_name),
            'Nome da Mãe/Mother Name': '{} {}'.format(self.mother_first_name, self.mother_last_name),
            'Data de nascimento/Date of birth': self.birthdate.strftime('%d.%m.%Y'),
            'País de origem/Country of origin': self.country,
            'Nacionalidade/Nationality': self.nationality,
        }

    def getPassport(self):
                        # https://generatormr.com/wp-content/themes/mrgenerator/img/countries/portugal.png
        return {
            'Tipo/Type': 'PC',
            'Código do País/Code of issuing State': self.passport_country,
            'Passaporte Número/Passport no.': self.passport_number,
            'Apelido(s)/Surname': self.last_name,
            'Name(s) próprio(s)/Given name(s)': self.first_name,
            'Nacionalidade/Nationality': self.passport_nationality,
            'Altura/Height': self.height,
            'Data de nascimento/Date of birth': self.birthdate.strftime('%d.%m.%Y'),
            'Número de identificação pessoal/Personal identifying number': self.passport_identifying_number,
            'Sexo/Sex': self.sex,
            'Local de nascimento/Place of Birth': self.city_of_birth,
            'Data de emissão/Date of issue': self.passport_publish_date.strftime('%d.%m.%Y'),
            'Válido até/Date of expiry': self.passport_expiration_date.strftime('%d.%m.%Y'),
        }

    def getHealthInsurance(self):
        return {
            'Apelido(s)/Last name(s)': self.last_name,
            'Name(s) próprio(s)/First name(s)': self.first_name,
            'Data de nascimento/Date of birth': self.birthdate.strftime('%d.%m.%Y'),
            'Número Segurança Social/Social Security Number': self.social_security_number,
            'Número Utente de Saúde/Health Number': self.health_number,
        }

    def getMedicalHistory(self):
        return {
            'Número Segurança Social/Social Security Number': self.social_security_number,
            'Número Utente de Saúde/Health Number': self.health_number,
            'Apelido(s)/Last name(s)': self.last_name,
            'Name(s) próprio(s)/First name(s)': self.first_name,
            'Incapacidade/Disability': self.disability_record,
            'Taxa de deficiência/Disability rate': self.disability_rate
        }

    def getProofOfIncome(self):
        return {
            'Número Identificação Fiscal/Tax Number': self.tax_number,
            'Apelido(s)/Last name(s)': self.last_name,
            'Name(s) próprio(s)/First name(s)': self.first_name,
            'Empresa/Company': self.company,
            'Data de início/Ημ. Έναρξης': self.work_start_date,
            'Trabalho/Work Position': self.work_position,
            'Salário/Salary': self.salary,
        }

    def getSchoolCertificate(self):
        return {
            'Name(s) próprio(s)/First name(s)': self.first_name,
            'Apelido(s)/Last name(s)': self.last_name,
            'Nome do Pai/Father Name': '{} {}'.format(self.father_first_name, self.father_last_name),
            'Nome da Mãe/Mother Name': '{} {}'.format(self.mother_first_name, self.mother_last_name),
            'Data de nascimento/Date of birth': self.birthdate.strftime('%d.%m.%Y'),
            'Data de emissão/Date of issue': self.school_certificate_date,
            'Avaliar/Grade': self.school_certificate_grade_value,
        }

    def getCriminalRecord(self):
        return {
            'Name(s) próprio(s)/First name(s)': self.first_name,
            'Apelido(s)/Last name(s)': self.last_name,
            'Nome do Pai/Father Name': '{} {}'.format(self.father_first_name, self.father_last_name),
            'Nome da Mãe/Mother Name': '{} {}'.format(self.mother_first_name, self.mother_last_name),
            'Número Segurança Social/Social Security Number': self.social_security_number,
            'Utente de Saúde/Health Number': self.health_number,
            'Data de nascimento/Date of birth': self.birthdate.strftime('%d.%m.%Y'),
            'Local de nascimento/Place of Birth': self.city_of_birth,
            'Situação Criminal/Criminal Status': self.criminal_record
        }


In [None]:
citizens = [Portuguese_Citizen() for _ in range(100)]

In [None]:
IDs = [citizen.getID() for citizen in citizens]
pd.DataFrame(IDs)

Unnamed: 0,APELIDO[S] | SURNAME,NOME[S] | GIVEN NAME,SEXO | SEX,ALTURA | HEIGHT,NACIONALIDADE | NATIONALITY,DATA DE NASCIMENTO,NÚMERO ID CIVIL | CIVIL ID No.,NÚMERO DOCUMENTO | DOCUMENT No.,DATA DE VALIDADE | EXPIRY DATE,FILIAÇÃO | PARENTS,NÚMERO IDENTIFICAÇÃO FISCAL | TAX No.,NÚMERO SEGURANÇA SOCIAL | SOCIAL SECURITY No.,NÚMERO UTENTE DE SAÚDE | HEALTH No.
0,Correia,Amélia,F,1.81,PRT,01 04 1958,46821195,46821195 7 ZZ2,02 02 2029,Fábio Matias | Inês Silva,0006283370,217446276180901,000908554486120
1,Mota,Manuel,M,1.64,PRT,03 01 2011,23536214,23536214 1 ZZ2,02 12 2028,Guilherme Lourenço | Rafaela Correia,0002724882,599874793857829,000154671509525
2,Lourenço,Gil,M,1.83,PRT,17 11 1981,51895679,51895679 2 ZZ5,05 02 2031,William Cunha | Irina Teixeira,0008359404,095301452751554,000941420982124
3,Monteiro,Rodrigo,M,1.92,PRT,30 06 1933,27576010,27576010 2 ZZ0,28 07 2023,Samuel Cardoso | Luana Ramos,0003236071,085157265358858,000846557345819
4,Leite,Renato,M,1.72,PRT,28 11 1957,98394928,98394928 1 ZZ9,03 01 2029,Noah Reis | Kelly Sousa,0008230433,865319689889593,000024998505353
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Sousa,Melissa,F,1.47,PRT,25 12 1933,72789776,72789776 4 ZZ7,02 11 2027,Rúben Baptista | Mafalda Martins,0006426327,528925714282011,000816899803271
96,Martins,Xavier,M,1.74,PRT,21 10 1960,90009105,90009105 4 ZZ7,31 05 2029,Tomás Simões | Kelly Figueiredo,0000602414,704128316493178,000983758587690
97,Machado,Luciana,F,1.73,PRT,29 09 2004,27383195,27383195 2 ZZ4,23 06 2029,Guilherme Lopes | Constança Lima,0008786708,276552295484875,000520921750709
98,Abreu,Francisco,M,2.02,PRT,22 11 1949,64294291,64294291 3 ZZ8,04 06 2024,Frederico Soares | Pilar Teixeira,0003919875,650385983595941,000452704730202


In [None]:
BirthCertificates = [citizen.getBirthCertificate() for citizen in citizens]
pd.DataFrame(BirthCertificates)

Unnamed: 0,Número Id Civil | Civil Id No.,Apelido(s)/Surname,Name(s) próprio(s)/Given name(s),Nome do Pai/Father Name,Nome da Mãe/Mother Name,Data de nascimento/Date of birth,País de origem/Country of origin,Nacionalidade/Nationality
0,46821195,Correia,Amélia,Fábio Matias,Inês Silva,01.04.1958,Portugal,PRT
1,23536214,Mota,Manuel,Guilherme Lourenço,Rafaela Correia,03.01.2011,Portugal,PRT
2,51895679,Lourenço,Gil,William Cunha,Irina Teixeira,17.11.1981,Portugal,PRT
3,27576010,Monteiro,Rodrigo,Samuel Cardoso,Luana Ramos,30.06.1933,Portugal,PRT
4,98394928,Leite,Renato,Noah Reis,Kelly Sousa,28.11.1957,Portugal,PRT
...,...,...,...,...,...,...,...,...
95,72789776,Sousa,Melissa,Rúben Baptista,Mafalda Martins,25.12.1933,Portugal,PRT
96,90009105,Martins,Xavier,Tomás Simões,Kelly Figueiredo,21.10.1960,Portugal,PRT
97,27383195,Machado,Luciana,Guilherme Lopes,Constança Lima,29.09.2004,Portugal,PRT
98,64294291,Abreu,Francisco,Frederico Soares,Pilar Teixeira,22.11.1949,Portugal,PRT


In [None]:
Passports = [citizen.getPassport() for citizen in citizens]
pd.DataFrame(Passports)

Unnamed: 0,Tipo/Type,Código do País/Code of issuing State/Code de,Passaporte Número/Passport no.,Apelido(s)/Surname,Name(s) próprio(s)/Given name(s),Nacionalidade/Nationality,Altura/Height,Data de nascimento/Date of birth,Número de identificação pessoal/Personal identifying number,Sexo/Sex,Local de nascimento/Place of Birth,Data de emissão/Date of issue,Válido até/Date of expiry
0,PC,PRT,BK2321719,Correia,Amélia,PORTUGUESA,1.81,01.04.1958,6752992,F,Leiria,31.05.2017,30.05.2022
1,PC,PRT,XH6249342,Mota,Manuel,PORTUGUESA,1.64,03.01.2011,7170632,M,Funchal,20.07.2017,19.07.2022
2,PC,PRT,YA1816850,Lourenço,Gil,PORTUGUESA,1.83,17.11.1981,6441617,M,Valongo,25.09.2020,24.09.2025
3,PC,PRT,XH2954941,Monteiro,Rodrigo,PORTUGUESA,1.92,30.06.1933,7196692,M,Rio Maior,26.12.2020,25.12.2025
4,PC,PRT,XZ7154115,Leite,Renato,PORTUGUESA,1.72,28.11.1957,1233949,M,Sabugal,28.08.2020,27.08.2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,PC,PRT,TM9398710,Sousa,Melissa,PORTUGUESA,1.47,25.12.1933,1465196,F,Tarouca,27.11.2017,26.11.2022
96,PC,PRT,ZE7137024,Martins,Xavier,PORTUGUESA,1.74,21.10.1960,2437982,M,Alcobaça,02.10.2018,01.10.2023
97,PC,PRT,TO6909197,Machado,Luciana,PORTUGUESA,1.73,29.09.2004,0397209,F,Vila Nova de Foz Côa,10.04.2018,09.04.2023
98,PC,PRT,KY7453727,Abreu,Francisco,PORTUGUESA,2.02,22.11.1949,8053126,M,Moura,13.02.2018,12.02.2023


In [None]:
HealthInsurances = [citizen.getHealthInsurance() for citizen in citizens]
pd.DataFrame(HealthInsurances)

Unnamed: 0,Apelido(s)/Last name(s),Name(s) próprio(s)/First name(s),Data de nascimento/Date of birth,Número Segurança Social/Social Security Number,Número Utente de Saúde/Health Number
0,Correia,Amélia,01.04.1958,217446276180901,000908554486120
1,Mota,Manuel,03.01.2011,599874793857829,000154671509525
2,Lourenço,Gil,17.11.1981,095301452751554,000941420982124
3,Monteiro,Rodrigo,30.06.1933,085157265358858,000846557345819
4,Leite,Renato,28.11.1957,865319689889593,000024998505353
...,...,...,...,...,...
95,Sousa,Melissa,25.12.1933,528925714282011,000816899803271
96,Martins,Xavier,21.10.1960,704128316493178,000983758587690
97,Machado,Luciana,29.09.2004,276552295484875,000520921750709
98,Abreu,Francisco,22.11.1949,650385983595941,000452704730202


In [None]:
MedicalHistories = [citizen.getMedicalHistory() for citizen in citizens]
pd.DataFrame(MedicalHistories)

Unnamed: 0,Número Segurança Social/Social Security Number,Número Utente de Saúde/Health Number,Apelido(s)/Last name(s),Name(s) próprio(s)/First name(s),Incapacidade/Disability,Taxa de deficiência/Disability rate
0,217446276180901,000908554486120,Correia,Amélia,,
1,599874793857829,000154671509525,Mota,Manuel,,
2,095301452751554,000941420982124,Lourenço,Gil,,
3,085157265358858,000846557345819,Monteiro,Rodrigo,Muscular Dystrophy (MD),0.1
4,865319689889593,000024998505353,Leite,Renato,Mental Illness,0.6
...,...,...,...,...,...,...
95,528925714282011,000816899803271,Sousa,Melissa,Blindness,0.8
96,704128316493178,000983758587690,Martins,Xavier,,
97,276552295484875,000520921750709,Machado,Luciana,,
98,650385983595941,000452704730202,Abreu,Francisco,,


In [None]:
ProofsOfIncome = [citizen.getProofOfIncome() for citizen in citizens]
pd.DataFrame(ProofsOfIncome)

Unnamed: 0,Número Identificação Fiscal/Tax Number,Apelido(s)/Last name(s),Name(s) próprio(s)/First name(s),Empresa/Company,Data de início/Ημ. Έναρξης,Trabalho/Work Position,Salário/Salary
0,0006283370,Correia,Amélia,Figueiredo,2007-10-07,Jogador profissional de futebol,12375
1,0002724882,Mota,Manuel,Henriques,2013-05-19,Cabeleireiro e barbeiro,10453
2,0008359404,Lourenço,Gil,Freitas,2009-08-28,Mineiro,39076
3,0003236071,Monteiro,Rodrigo,Pires,2012-06-02,Profissionais de nível intermédio da medicina ...,11263
4,0008230433,Leite,Renato,Leal Anjos e Filhos,2018-06-26,Técnico de nível intermédio de apoio social,15622
...,...,...,...,...,...,...,...
95,0006426327,Sousa,Melissa,Mota,2016-01-31,Motorista de automóveis ligeiros e carrinhas,39112
96,0000602414,Martins,Xavier,Esteves Lourenço S/A,2016-01-25,Assentador de refratários,33979
97,0008786708,Machado,Luciana,Faria,2007-03-25,Trabalhador qualificado da jardinagem,18357
98,0003919875,Abreu,Francisco,Loureiro,2010-12-24,Regulador e operador de máquinas ferramentas c...,22988


In [None]:
SchoolCertificates = [citizen.getSchoolCertificate() for citizen in citizens]
pd.DataFrame(SchoolCertificates)

Unnamed: 0,Name(s) próprio(s)/First name(s),Apelido(s)/Last name(s),Nome do Pai/Father Name,Nome da Mãe/Mother Name,Data de nascimento/Date of birth,Data de emissão/Date of issue,Avaliar/Grade
0,Amélia,Correia,Fábio Matias,Inês Silva,01.04.1958,1976-06-15,18.9
1,Manuel,Mota,Guilherme Lourenço,Rafaela Correia,03.01.2011,2029-03-15,10.5
2,Gil,Lourenço,William Cunha,Irina Teixeira,17.11.1981,2000-01-21,11.2
3,Rodrigo,Monteiro,Samuel Cardoso,Luana Ramos,30.06.1933,1951-07-25,16.4
4,Renato,Leite,Noah Reis,Kelly Sousa,28.11.1957,1975-12-17,17.2
...,...,...,...,...,...,...,...
95,Melissa,Sousa,Rúben Baptista,Mafalda Martins,25.12.1933,1952-01-21,10.8
96,Xavier,Martins,Tomás Simões,Kelly Figueiredo,21.10.1960,1978-11-25,12.6
97,Luciana,Machado,Guilherme Lopes,Constança Lima,29.09.2004,2022-11-03,18.4
98,Francisco,Abreu,Frederico Soares,Pilar Teixeira,22.11.1949,1968-02-06,17.0


In [None]:
CriminalRecords = [citizen.getCriminalRecord() for citizen in citizens]
pd.DataFrame(CriminalRecords)

Unnamed: 0,Name(s) próprio(s)/First name(s),Apelido(s)/Last name(s),Nome do Pai/Father Name,Nome da Mãe/Mother Name,Número Segurança Social/Social Security Number,Utente de Saúde/Health Number,Data de nascimento/Date of birth,Local de nascimento/Place of Birth,Situação Criminal/Criminal Status
0,Amélia,Correia,Fábio Matias,Inês Silva,217446276180901,000908554486120,01.04.1958,Leiria,-
1,Manuel,Mota,Guilherme Lourenço,Rafaela Correia,599874793857829,000154671509525,03.01.2011,Funchal,-
2,Gil,Lourenço,William Cunha,Irina Teixeira,095301452751554,000941420982124,17.11.1981,Valongo,-
3,Rodrigo,Monteiro,Samuel Cardoso,Luana Ramos,085157265358858,000846557345819,30.06.1933,Rio Maior,-
4,Renato,Leite,Noah Reis,Kelly Sousa,865319689889593,000024998505353,28.11.1957,Sabugal,-
...,...,...,...,...,...,...,...,...,...
95,Melissa,Sousa,Rúben Baptista,Mafalda Martins,528925714282011,000816899803271,25.12.1933,Tarouca,-
96,Xavier,Martins,Tomás Simões,Kelly Figueiredo,704128316493178,000983758587690,21.10.1960,Alcobaça,-
97,Luciana,Machado,Guilherme Lopes,Constança Lima,276552295484875,000520921750709,29.09.2004,Vila Nova de Foz Côa,-
98,Francisco,Abreu,Frederico Soares,Pilar Teixeira,650385983595941,000452704730202,22.11.1949,Moura,-


In [None]:
citizen = Portuguese_Citizen()
attrs = vars(citizen)
attrs

{'birthdate': datetime.date(1913, 6, 9),
 'building_number': 'S/N',
 'city': 'Penafiel',
 'city_of_birth': 'Aveiro',
 'company': 'Lopes',
 'country': 'Portugal',
 'criminal_record': '-',
 'disability_rate': None,
 'disability_record': None,
 'electoral_number': '0973013799386',
 'father_first_name': 'Igor',
 'father_last_name': 'Pinto',
 'first_name': 'Kévim',
 'full_name': 'Kévim Batista',
 'health_insurance_expiration_date': datetime.date(2023, 1, 5),
 'health_insurance_publish_date': datetime.date(2008, 5, 31),
 'health_number': '000285621406563',
 'height': 1.95,
 'id_civil_number': '37527034',
 'id_document_number': '37527034 0 ZZ8',
 'id_expiry_date': datetime.date(2021, 9, 10),
 'last_name': 'Batista',
 'mother_first_name': 'Beatriz',
 'mother_last_name': 'Santos',
 'nationality': 'PRT',
 'passport_country': 'PRT',
 'passport_expiration_date': datetime.date(2025, 1, 5),
 'passport_identifying_number': '9460046',
 'passport_nationality': 'PORTUGUESA',
 'passport_number': 'HT88925