In [33]:
import pandas as pd
from faker import Faker
Faker.seed(999)
import random


In [34]:
# Initialize Faker
fake = Faker('en_GB')

# Define the number of rows you want to generate
num_rows = 1000

# Define the columns
columns = [
    "Engagement ID",
    "Creation Date",
    "Release Date",
    "Last Time Charged Date",
    "Last Expenses Charged Date",
    "Last Active ETC-P Date",
    "Engagement",
    "Client",
    "Engagement Partner",
    "Engagement Partner GUI",
    "Engagement Manager",
    "Engagement Manager GUI",
    "Engagement Partner Service Line",
    "Engagement Status",
]


In [35]:

# Create a list to hold the data
data = []

# Generate dummy data
for _ in range(num_rows):
    engagement_id = fake.unique.pystr_format("E-########")
    creation_date = fake.date_between(start_date='-1y', end_date='today')
    release_date = fake.date_between(start_date=creation_date, end_date='today')
    last_time_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_expenses_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_active_etcp_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    engagement = fake.bs().title()
    client = fake.company()
    engagement_partner = fake.last_name() + ", " + fake.first_name()
    engagement_partner_gui = fake.unique.pystr_format("#######")
    engagement_manager = fake.last_name() + ", " + fake.first_name()
    engagement_manager_gui = fake.unique.pystr_format("#######")
    engagement_partner_service_line = random.choice(["CBS & Elim", "Assurance", "Consulting", "Tax", "SaT"])
    engagement_status = random.choice(["Released", "Active", "Pending"])

    data.append([
        engagement_id,
        creation_date,
        release_date,
        last_time_charged_date,
        last_expenses_charged_date,
        last_active_etcp_date,
        engagement,
        client,
        engagement_partner,
        engagement_partner_gui,
        engagement_manager,
        engagement_manager_gui,
        engagement_partner_service_line,
        engagement_status,
    ])

# Create a DataFrame
df = pd.DataFrame(data, columns=columns)

df.head()

Unnamed: 0,Engagement ID,Creation Date,Release Date,Last Time Charged Date,Last Expenses Charged Date,Last Active ETC-P Date,Engagement,Client,Engagement Partner,Engagement Partner GUI,Engagement Manager,Engagement Manager GUI,Engagement Partner Service Line,Engagement Status
0,E-19987725,2024-02-03,2024-04-27,2024-06-04,,2024-05-27,E-Enable Dynamic Eyeballs,Simpson-Manning,"Marsh, Patricia",3614038,"Martin, Frances",5218975,Consulting,Released
1,E-27700313,2023-11-07,2024-05-20,2024-06-09,2024-05-28,,Redefine Frictionless Bandwidth,Pickering PLC,"Fisher, Ronald",3310308,"Johnson, Sean",3795710,Tax,Released
2,E-68169104,2023-10-30,2024-01-08,2024-05-07,2024-01-16,2024-04-14,Redefine Plug-And-Play Paradigms,Nicholls Inc,"Lynch, Tony",1187526,"Mann, Carol",2780849,Consulting,Released
3,E-81470279,2023-09-22,2024-04-15,,,,Revolutionize Magnetic Convergence,Moore PLC,"Taylor, Janet",6785925,"Parker, Robin",7453085,SaT,Active
4,E-70544691,2023-10-14,2023-10-23,,,2024-01-18,Empower Wireless Web Services,Carpenter-Parker,"Smith, Ellie",989648,"Cox, Hannah",5295391,CBS & Elim,Released


In [36]:
# Save the DataFrame to a CSV file
df.to_excel("./dummyData/test_engagementData.xlsx", index=False)


In [37]:
# get list of unique values in a engagement_partner_gui and engagement_manager_gui in a single list
engagement_partner_gui = df['Engagement Partner GUI'].unique().tolist()
engagement_manager_gui = df['Engagement Manager GUI'].unique().tolist()
gui = engagement_partner_gui + engagement_manager_gui
# remove duplicates
gui = list(set(gui))
# print gui size
print(len(gui))
# add faker safe email address to each gui and move to a df
email = []
for i in gui:
    email.append(fake.safe_email())
df_email = pd.DataFrame(list(zip(gui, email)), columns=['GUI', 'Email'])
# sort by gui
df_email = df_email.sort_values(by=['GUI'])
df_email.head()


2000


Unnamed: 0,GUI,Email
1449,1677,damiangibbons@example.com
800,16180,hughkhan@example.com
59,17767,xwatson@example.net
1901,31483,andrew02@example.com
1770,32381,kellyvanessa@example.net


In [38]:
# Save the DataFrame to a CSV file
df_email.to_excel("./dummyData/test_emailList.xlsx", index=False)

# Exception

In [39]:
# list of random engagement_id, append additional columns to the list as a df. Columns are: Engagement ID, Exception Reason, Exception Reason Description, Exception Reason Category, Exception Reason Subcategory, Exception Reason Type, Exception Reason Status, Exception Reason Status Description, Exception Reason Status Category,  Exception Reason Status Type, Exception Reason Status Start Date, Exception Reason Status User, Exception Reason Status User GUI, Exception Reason Status User Email
engagement_id = df['Engagement ID'].sample(n=25).tolist()
exception_reason_category = []
exception_reason_description = []
exception_reason_status = []
exception_reason_start_date = []
exception_reason_status_user = []
exception_reason_status_user_gui = []
exception_reason_status_user_email = []

for i in engagement_id:
    exception_reason_category.append(random.choice (["Tax", "Inventment Code", "Invoice", "Other"]))
    exception_reason_description.append(fake.sentence())
    exception_reason_status.append(random.choice (["Pending EP Approval", "Pending Finance Approval", "Approved", " Finance Rejected", "EP Rejected" "Expired", "New Time"]))
    exception_reason_start_date.append(fake.date_between(start_date='-4w', end_date='today'))
    exception_reason_status_user.append(fake.last_name() + ", " + fake.first_name())
    exception_reason_status_user_gui.append(fake.unique.pystr_format("#######"))
    exception_reason_status_user_email.append(fake.safe_email())

df_exception = pd.DataFrame(list(zip(engagement_id, exception_reason_category, exception_reason_description, exception_reason_status, exception_reason_start_date, exception_reason_status_user, exception_reason_status_user_gui, exception_reason_status_user_email)), columns=['Engagement ID', 'Exception Reason Category', 'Exception Reason Description', 'Exception Reason Status', 'Exception Reason Status Start Date', 'Exception Reason Status User', 'Exception Reason Status User GUI', 'Exception Reason Status User Email'])

df_exception.head(25)

df_exception.to_excel("./dummyData/test_exceptionData.xlsx", index=False)


In [41]:
# create delegate table 
engagement_id = df['Engagement ID'].sample(n=25).tolist()
delegate_1_name = []
delegate_1_gui = []
delegate_1_email = []
delegate_2_name = []
delegate_2_gui = []
delegate_2_email = []
delegate_3_name = []
delegate_3_gui = []
delegate_3_email = []

for i in engagement_id:
    delegate_1_name.append(fake.last_name() + ", " + fake.first_name())
    delegate_1_gui.append(fake.unique.pystr_format("#######"))
    delegate_1_email.append(fake.safe_email())
    delegate_2_name.append(fake.last_name() + ", " + fake.first_name())
    delegate_2_gui.append(fake.unique.pystr_format("#######"))
    delegate_2_email.append(fake.safe_email())
    delegate_3_name.append(fake.last_name() + ", " + fake.first_name())
    delegate_3_gui.append(fake.unique.pystr_format("#######"))
    delegate_3_email.append(fake.safe_email())

df_delegate = pd.DataFrame(list(zip(engagement_id, delegate_1_name, delegate_1_gui, delegate_1_email, delegate_2_name, delegate_2_gui, delegate_2_email, delegate_3_name, delegate_3_gui, delegate_3_email)), columns=['Engagement ID', 'Delegate 1 Name', 'Delegate 1 GUI', 'Delegate 1 Email', 'Delegate 2 Name', 'Delegate 2 GUI', 'Delegate 2 Email', 'Delegate 3 Name', 'Delegate 3 GUI', 'Delegate 3 Email'])


# unpivot the data: name, email and gui 
df_delegate = pd.melt(df_delegate, id_vars=['Engagement ID'], value_vars=['Delegate 1 Name', 'Delegate 1 GUI', 'Delegate 1 Email', 'Delegate 2 Name', 'Delegate 2 GUI', 'Delegate 2 Email', 'Delegate 3 Name', 'Delegate 3 GUI', 'Delegate 3 Email'], var_name='Attribute', value_name='Value')
df_delegate = df_delegate.sort_values(by=['Engagement ID', 'Attribute'])


# Save the DataFrame to a excel file
df_delegate.to_excel("./dummyData/test_delegateData.xlsx", index=False)

df_delegate.head(25)

Unnamed: 0,Engagement ID,Attribute,Value
65,E-01268348,Delegate 1 Email,pholt@example.org
40,E-01268348,Delegate 1 GUI,0215125
15,E-01268348,Delegate 1 Name,"Singh, Geraldine"
140,E-01268348,Delegate 2 Email,haynesowen@example.net
115,E-01268348,Delegate 2 GUI,0021629
90,E-01268348,Delegate 2 Name,"Warner, Ashley"
215,E-01268348,Delegate 3 Email,ashley68@example.net
190,E-01268348,Delegate 3 GUI,4795426
165,E-01268348,Delegate 3 Name,"Singh, Ronald"
74,E-04338406,Delegate 1 Email,kieranmorgan@example.com
