In [1]:
from faker import Faker
import random
from datetime import datetime, timedelta
import numpy as np

# Initialize Faker
fake = Faker()

# Function to generate random date of birth within a given range
def generate_dob(start_date, end_date):
    dob = fake.date_between(start_date=start_date, end_date=end_date)
    return dob.strftime('%Y-%m-%d')

# Function to generate random age based on date of birth
def calculate_age(dob, mean_adult_age=40, std_dev_adult=15, child_prob=0.15):
    today = datetime.today()
    dob = datetime.strptime(dob, '%Y-%m-%d')
    age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))
    if random.random() < child_prob:
        age = np.random.randint(3, 13)  # Random age between 3 and 12 with probability 15%
    else:
        age = np.clip(np.round(np.random.normal(mean_adult_age, std_dev_adult)), 18, None)
    return age

# Function to generate random gender
def generate_gender():
    genders = ['Male', 'Female']
    return random.choice(genders)

# Function to generate patient data
def generate_patient_data(num_patients):
    patients = []
    for _ in range(num_patients):
        first_name = fake.first_name()
        last_name = fake.last_name()
        name = f"{first_name} {last_name}"       
        dob = generate_dob(start_date='-90y', end_date='-18y')  # Generating DOB for patients aged between 18 and 90
        age = calculate_age(dob)
        gender = generate_gender()
        medical_record_number = fake.numerify(text="E14##########")  # Generating medical record number
        
        patient = {
            'FirstName': first_name,
            'LastName': last_name,
            'Name': name,
            'DateOfBirth': dob,
            'Age': age,
            'Gender': gender,
            'MedicalRecordNumber': medical_record_number,
        }
        patients.append(patient)
    return patients

In [2]:
# Function to generate random credit card number
def generate_credit_card_number():
    credit_card_number = fake.credit_card_number(card_type='visa')  # Generating Visa card number
    expiration_date = fake.credit_card_expire()  # Generating expiration date
    cvv = fake.credit_card_security_code()  # Generating CVV
    return credit_card_number, expiration_date, cvv

# Function to generate patient credit card data
def generate_patient_credit_card_data(patient_data):
    credit_card_data = []
    for patient in patient_data:
        CreditCardNumber, ExpirationDate, CVV = generate_credit_card_number()
        credit_card = {
            'Name': patient['Name'],
            'MedicalRecordNumber': patient['MedicalRecordNumber'],
            'CreditCardNumber': CreditCardNumber,
            'ExpirationDate': ExpirationDate,
            'CVV': CVV
        }
        credit_card_data.append(credit_card)
    return credit_card_data

In [3]:
import re

data = """
72701 (Fayetteville)
72703 (Fayetteville)
72704 (Fayetteville)
72712 (Bentonville)
72714 (Bella Vista)
72715 (Bella Vista)
72717 (Canehill)
72718 (Cave Springs)
72719 (Centerton)
72721 (Combs)
72722 (Decatur)
72727 (Elkins)
72729 (Evansville)
72730 (Farmington)
72732 (Garfield)
72734 (Gentry)
72736 (Gravette)
72738 (Hindsville)
72739 (Hiwasse)
72740 (Huntsville)
72742 (Kingston)
72744 (Lincoln)
72745 (Lowell)
72747 (Maysville)
72749 (Morrow)
72751 (Pea Ridge)
72752 (Pettigrew)
72753 (Prairie Grove)
72756 (Rogers)
72758 (Rogers)
72760 (Saint Paul)
72761 (Siloam Springs)
72762 (Springdale)
72764 (Springdale)
72768 (Sulphur Springs)
72769 (Summers)
72773 (Wesley)
72774 (West Fork)
72776 (Witter)
72959 (Winslow)
"""

# Use regular expression to find all numeric parts
zipcodes = re.findall(r'\d+', data)

In [4]:
from pyzipcode import ZipCodeDatabase

# Function to generate fake address in Northwest Arkansas
def generate_nw_arkansas_address(zipcodes):
    
    zcdb = ZipCodeDatabase()
    # Generate fake address in Northwest Arkansas
    zip = random.choice(zipcodes)
    
    # Use pyzipcode library to find city and state from a zip code.
    zipcode_data = zcdb[zip]
    # if zipcode_data:
    #     return zipcode_data.city, zipcode_data.state
    # else:
    #     return None
    
    street = fake.street_address()
    city = zipcode_data.city
    state = zipcode_data.state
    return street, city, state, zip


# Function to generate patient address
def generate_patient_address(patient_data):
    patient_address = []
    for patient in patient_data:
            street, city, state, zip = generate_nw_arkansas_address(zipcodes)
            address = {
            'Name': patient['Name'],
            'MedicalRecordNumber': patient['MedicalRecordNumber'],
            'street': street,
            'city': city,
            'state': state,
            'zip': zip
        }
            patient_address.append(address)
    return patient_address

In [14]:
import os
import pandas as pd

if __name__ == "__main__":

    # Generate Patients
    num_patients = int(input("How many patients do you want to generate? "))
    patient_data = generate_patient_data(num_patients)
    # for patient in patient_data:
    #     print(patient)

    #Generate Credit Card Info
    credit_card_data = generate_patient_credit_card_data(patient_data)

    # Printing generated credit card data for demonstration
    # print("Credit Card Data:")
    # for credit_card in credit_card_data:
    #     print(credit_card)

    #Generate Patient Addresses
    patient_address = generate_patient_address(patient_data)

    # Printing generated credit card data for demonstration
    # print("Patient Address Data:")
    # for address in patient_address:
    #     print(address)
    
    #Save files as CSV
    
    folder_path = r"C:\Users\khang\Documents\GitHub\solo_projects\MockPatient"
    #Use input("Enter the folder path where you want to save the CSV file: ")
    #if you want to select a specific folder
    
    # Check if the folder exists, if not, create it
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        
    # Create a DataFrame with data
    df1 = pd.DataFrame(patient_data)
    df2 = pd.DataFrame(credit_card_data)
    df3 = pd.DataFrame(patient_address)
    
    # Save DataFrame to a CSV file
    file_path = os.path.join(folder_path, 'patient_data.csv')
    df1.to_csv(file_path, index=False)
    file_path = os.path.join(folder_path, 'credit_card_data.csv')
    df2.to_csv(file_path, index=False)
    file_path = os.path.join(folder_path, 'patient_address.csv')
    df3.to_csv(file_path, index=False)