In [16]:
import pandas as pd
from faker import Faker
# Faker.seed(999)
import random
import os


In [17]:
# Initialize Faker
fake = Faker('en_GB')

# Define the number of rows you want to generate
num_rows = 1000

# Define the columns
columns = [
    'Engagement ID', 
    'Creation Date', 
    'Release Date', 
    'Last Time Charged Date',
    'Last Expenses Charged Date', 
    'Last Active ETCP Date', 
    'Engagement',
    'Client', 
    'Engagement Region', 
    'Engagement Country', 
    'Engagement Type',
    'Currency', 
    'Engagement Partner', 
    'Engagement Partner GUI',
    'Engagement Manager', 
    'Engagement Manager GUI', 
    'Engagement Partner Service Line',
    'Engagement Status'
]


In [18]:

# Create a list to hold the data
data = []

# Generate dummy data
for _ in range(num_rows):
    engagement_id = fake.unique.pystr_format("E-########")
    creation_date = fake.date_between(start_date='-1y', end_date='today')
    release_date = fake.date_between(start_date=creation_date, end_date='today')
    last_time_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_expenses_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_active_etcp_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    engagement = fake.bs().title()
    client = fake.company()
    engagement_region = "EMEA"
    engagement_country = fake.country()
    engagement_type = "External Project"
    currency = fake.currency_code()
    engagement_partner = fake.last_name() + ", " + fake.first_name()
    engagement_partner_gui = fake.unique.pystr_format("#######")
    engagement_manager = fake.last_name() + ", " + fake.first_name()
    engagement_manager_gui = fake.unique.pystr_format("#######")
    engagement_partner_service_line = random.choice(["CBS & Elim", "Assurance", "Consulting", "Tax", "SaT"])
    engagement_status = random.choice(["Released", "Active", "Pending"])

    data.append([
        engagement_id,
        creation_date,
        release_date,
        last_time_charged_date,
        last_expenses_charged_date,
        last_active_etcp_date,
        engagement,
        client,
        engagement_region,
        engagement_country,
        engagement_type,
        currency,
        engagement_partner,
        engagement_partner_gui,
        engagement_manager,
        engagement_manager_gui,
        engagement_partner_service_line,
        engagement_status
    ])

# Create a DataFrame
df = pd.DataFrame(data, columns=columns)

df.head()

Unnamed: 0,Engagement ID,Creation Date,Release Date,Last Time Charged Date,Last Expenses Charged Date,Last Active ETCP Date,Engagement,Client,Engagement Region,Engagement Country,Engagement Type,Currency,Engagement Partner,Engagement Partner GUI,Engagement Manager,Engagement Manager GUI,Engagement Partner Service Line,Engagement Status
0,E-50420642,2023-08-18,2023-12-17,2024-05-05,2024-03-10,,Revolutionize Rich Convergence,Wright LLC,EMEA,Reunion,External Project,EGP,"Wilson, Andrew",9951123,"Wood, Tina",7598024,Consulting,Active
1,E-55025136,2024-06-12,2024-06-12,2024-06-13,,,Morph Cross-Media Info-Mediaries,Jackson Inc,EMEA,Malawi,External Project,SDG,"Jones, Russell",8380616,"Smith, Jake",3348389,Consulting,Released
2,E-42082039,2024-04-16,2024-04-30,,,2024-06-02,Maximize Value-Added Action-Items,"Macdonald, Harrison and Khan",EMEA,India,External Project,MOP,"Davison, Pamela",3308654,"Baker, Natasha",8984637,Assurance,Pending
3,E-92780236,2024-05-20,2024-05-25,2024-06-01,,2024-05-26,Seize Cross-Media Roi,Hall-Middleton,EMEA,Malawi,External Project,EGP,"Rice, Callum",8064158,"Johnson, Hannah",192335,Tax,Released
4,E-76827784,2024-05-27,2024-06-06,2024-06-11,,,Exploit Dot-Com Bandwidth,"Reeves, Dixon and Hartley",EMEA,Mexico,External Project,XCD,"Cooper, Yvonne",973727,"Nash, Jeremy",6306486,Tax,Released


In [19]:
# Make dir if it does not exist

if not os.path.exists('./dummyData'):
    os.makedirs('./dummyData')


# Save the DataFrame to a CSV file
df.to_excel("./dummyData/test_engagementData.xlsx", index=False)


In [20]:
# get list of unique values in a engagement_partner_gui and engagement_manager_gui in a single list
engagement_partner_gui = df['Engagement Partner GUI'].unique().tolist()
engagement_manager_gui = df['Engagement Manager GUI'].unique().tolist()
gui = engagement_partner_gui + engagement_manager_gui
# remove duplicates
gui = list(set(gui))
# print gui size
print(len(gui))
# add faker safe email address to each gui and move to a df
email = []
for i in gui:
    email.append(fake.safe_email())
df_email = pd.DataFrame(list(zip(gui, email)), columns=['GUI', 'Email'])
# sort by gui
df_email = df_email.sort_values(by=['GUI'])
df_email.head()


2000


Unnamed: 0,GUI,Email
1080,2590,charleslee@example.com
1385,5512,jmoran@example.com
1653,7355,faustin@example.org
289,18036,brett42@example.net
1470,19675,rebecca85@example.org


In [21]:
# Save the DataFrame to a CSV file
df_email.to_excel("./dummyData/test_emailList.xlsx", index=False)

# Exception

In [22]:
# list of random engagement_id, append additional columns to the list as a df. Columns are: Engagement ID, Exception Reason, Exception Reason Description, Exception Reason Category, Exception Reason Subcategory, Exception Reason Type, Exception Reason Status, Exception Reason Status Description, Exception Reason Status Category,  Exception Reason Status Type, Exception Reason Status Start Date, Exception Reason Status User, Exception Reason Status User GUI, Exception Reason Status User Email
engagement_id = df['Engagement ID'].sample(n=50).tolist()
exception_reason_category = []
exception_reason_description = []
exception_reason_status = []
exception_reason_start_date = []
exception_reason_status_user = []
exception_reason_status_user_gui = []
exception_reason_status_user_email = []

for i in engagement_id:
    exception_reason_category.append(random.choice (["Tax", "Inventment Code", "Invoice", "Other"]))
    exception_reason_description.append(fake.sentence())
    exception_reason_status.append(random.choice (["Pending EP Approval", "Pending Finance Approval", "Approved", " Finance Rejected", "EP Rejected" "Expired", "New Time"]))
    exception_reason_start_date.append(fake.date_between(start_date='-4w', end_date='today'))
    exception_reason_status_user.append(fake.last_name() + ", " + fake.first_name())
    exception_reason_status_user_gui.append(fake.unique.pystr_format("#######"))
    exception_reason_status_user_email.append(fake.safe_email())

df_exception = pd.DataFrame(list(zip(engagement_id, exception_reason_category, exception_reason_description, exception_reason_status, exception_reason_start_date, exception_reason_status_user, exception_reason_status_user_gui, exception_reason_status_user_email)), columns=['Engagement ID', 'Exception Reason Category', 'Exception Reason Description', 'Exception Reason Status', 'Exception Reason Status Start Date', 'Exception Reason Status User', 'Exception Reason Status User GUI', 'Exception Reason Status User Email'])

df_exception.head(25)


Unnamed: 0,Engagement ID,Exception Reason Category,Exception Reason Description,Exception Reason Status,Exception Reason Status Start Date,Exception Reason Status User,Exception Reason Status User GUI,Exception Reason Status User Email
0,E-72779033,Invoice,Dicta eaque illo aperiam.,New Time,2024-05-29,"Walsh, Hayley",8990136,blakelawrence@example.com
1,E-89546985,Invoice,Veniam dolor similique iure placeat.,Pending EP Approval,2024-05-20,"Davey, Danny",3754268,rachaelkerr@example.net
2,E-92308971,Inventment Code,Neque libero neque explicabo eaque.,Pending Finance Approval,2024-05-18,"Barnett, Michael",383780,mauricejohn@example.com
3,E-63512465,Tax,Impedit reprehenderit vero vitae eius.,EP RejectedExpired,2024-06-08,"Woods, Grace",3162637,pforster@example.com
4,E-45166318,Tax,Ipsum saepe rerum quod vitae consequatur.,Finance Rejected,2024-06-02,"Morris, Stuart",6008136,dannyholmes@example.net
5,E-97511634,Inventment Code,Culpa est illum id voluptas.,New Time,2024-05-19,"Robinson, Beth",606173,djohnson@example.org
6,E-00232464,Other,Voluptatibus quia voluptatibus soluta perspici...,Approved,2024-05-29,"Macdonald, Toby",7296100,stokeslucy@example.org
7,E-60169040,Inventment Code,Maiores repellendus commodi fuga similique con...,New Time,2024-06-08,"Miah, Lawrence",4095173,ljones@example.com
8,E-60336264,Inventment Code,Voluptates incidunt atque ullam nobis ut volup...,Pending EP Approval,2024-05-23,"Harris, Abdul",7560188,qmatthews@example.org
9,E-51319716,Inventment Code,Nam natus deserunt dolor.,New Time,2024-05-27,"Jones, Simon",4099534,dmitchell@example.org


In [23]:

df_exception.to_excel("./dummyData/test_exceptionData.xlsx", index=False)


In [24]:
# Sample 25 unique engagement IDs
engagement_id = df['Engagement ID'].sample(n=25, random_state=1).tolist()

# Create a list to hold delegate information
delegate_list = []

# Generate delegate information for each engagement ID
for eid in engagement_id:
    num_delegates = random.randint(1, 3)  # Randomly choose 1, 2, or 3 delegates
    for i in range(1, num_delegates + 1):
        delegate_name = fake.last_name() + ", " + fake.first_name()
        delegate_gui = fake.unique.pystr_format("#######")
        delegate_email = fake.safe_email()
        delegate_list.append({
            'Engagement ID': eid,
            'Delegate Number': i,
            'Delegate Name': delegate_name,
            'Delegate GUI': delegate_gui,
            'Delegate Email': delegate_email
        })

# Create a DataFrame from the delegate list
df_long = pd.DataFrame(delegate_list)

# Sort the DataFrame by Engagement ID and Delegate Number
df_long = df_long.sort_values(by=['Engagement ID', 'Delegate Number'])

df_long.reset_index(drop=True, inplace=True)

df_long.head(25)

Unnamed: 0,Engagement ID,Delegate Number,Delegate Name,Delegate GUI,Delegate Email
0,E-06560448,1,"Clayton, Paige",5503484,griffithsdennis@example.com
1,E-12889799,1,"Taylor, Donald",5298214,elaine03@example.com
2,E-12889799,2,"Phillips, Danielle",8276723,douglasdavis@example.com
3,E-26907631,1,"Ellis, Guy",1255919,joanne93@example.org
4,E-26907631,2,"Fletcher, Daniel",4579913,fhancock@example.com
5,E-27755999,1,"Akhtar, Garry",2124014,wendybrown@example.net
6,E-27755999,2,"Ford, Antony",9267791,robert65@example.org
7,E-27755999,3,"Barton, Carolyn",5931594,abdul05@example.net
8,E-29324616,1,"Ali, Jill",8990806,kyle55@example.org
9,E-29324616,2,"Hunter, Marion",4595618,bnewton@example.org


In [25]:
# Save the DataFrame to an Excel file
df_long.to_excel("./dummyData/test_delegateData.xlsx", index=False)