In [20]:
import pandas as pd
from faker import Faker
Faker.seed(999)
import random


In [21]:
# Initialize Faker
fake = Faker('en_GB')

# Define the number of rows you want to generate
num_rows = 1000

# Define the columns
columns = [
    "Engagement ID",
    "Creation Date",
    "Release Date",
    "Last Time Charged Date",
    "Last Expenses Charged Date",
    "Last Active ETC-P Date",
    "Engagement",
    "Client",
    "Engagement Partner",
    "Engagement Partner GUI",
    "Engagement Manager",
    "Engagement Manager GUI",
    "Engagement Partner Service Line",
    "Engagement Status",
]


In [22]:

# Create a list to hold the data
data = []

# Generate dummy data
for _ in range(num_rows):
    engagement_id = fake.unique.pystr_format("E-########")
    creation_date = fake.date_between(start_date='-1y', end_date='today')
    release_date = fake.date_between(start_date=creation_date, end_date='today')
    last_time_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_expenses_charged_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    last_active_etcp_date = fake.date_between(start_date=release_date, end_date='today') if random.choice([True, False]) else None
    engagement = fake.bs().title()
    client = fake.company()
    engagement_partner = fake.last_name() + ", " + fake.first_name()
    engagement_partner_gui = fake.unique.pystr_format("#######")
    engagement_manager = fake.last_name() + ", " + fake.first_name()
    engagement_manager_gui = fake.unique.pystr_format("#######")
    engagement_partner_service_line = random.choice(["CBS & Elim", "Assurance", "Consulting", "Tax", "SaT"])
    engagement_status = random.choice(["Released", "Active", "Pending"])

    data.append([
        engagement_id,
        creation_date,
        release_date,
        last_time_charged_date,
        last_expenses_charged_date,
        last_active_etcp_date,
        engagement,
        client,
        engagement_partner,
        engagement_partner_gui,
        engagement_manager,
        engagement_manager_gui,
        engagement_partner_service_line,
        engagement_status,
    ])

# Create a DataFrame
df = pd.DataFrame(data, columns=columns)

df.head()

Unnamed: 0,Engagement ID,Creation Date,Release Date,Last Time Charged Date,Last Expenses Charged Date,Last Active ETC-P Date,Engagement,Client,Engagement Partner,Engagement Partner GUI,Engagement Manager,Engagement Manager GUI,Engagement Partner Service Line,Engagement Status
0,E-19987725,2024-02-03,2024-04-27,2024-06-04,,2024-05-27,E-Enable Dynamic Eyeballs,Simpson-Manning,"Marsh, Patricia",3614038,"Martin, Frances",5218975,SaT,Released
1,E-27700313,2023-11-07,2024-05-20,2024-06-09,,,Seize Revolutionary Communities,Sharp-Pickering,"Blackburn, Ronald",3310308,"Johnson, Sean",3795710,CBS & Elim,Active
2,E-68169104,2023-10-30,2024-01-08,2024-05-07,2024-01-16,2024-04-14,Redefine Plug-And-Play Paradigms,Nicholls Inc,"Lynch, Tony",1187526,"Mann, Carol",2780849,Tax,Pending
3,E-81470279,2023-09-22,2024-04-15,2024-05-30,,2024-05-05,Maximize Holistic Deliverables,Black Group,"Knight, Molly",2503745,"Smith, Shannon",5705446,Tax,Active
4,E-91560664,2023-11-05,2024-03-30,2024-04-12,,2024-04-02,Incentivize Proactive Metrics,"Bradshaw, Harris and Dennis","Morgan, Danny",2953918,"Phillips, Danny",693247,CBS & Elim,Pending


In [23]:
# Save the DataFrame to a CSV file
df.to_excel("./dummyData/test_engagementData.xlsx", index=False)


In [24]:
# get list of unique values in a engagement_partner_gui and engagement_manager_gui in a single list
engagement_partner_gui = df['Engagement Partner GUI'].unique().tolist()
engagement_manager_gui = df['Engagement Manager GUI'].unique().tolist()
gui = engagement_partner_gui + engagement_manager_gui
# remove duplicates
gui = list(set(gui))
# print gui size
print(len(gui))
# add faker safe email address to each gui and move to a df
email = []
for i in gui:
    email.append(fake.safe_email())
df_email = pd.DataFrame(list(zip(gui, email)), columns=['GUI', 'Email'])
# sort by gui
df_email = df_email.sort_values(by=['GUI'])
df_email.head()


2000


Unnamed: 0,GUI,Email
251,1215,johnsonolivia@example.net
1720,1527,albertbruce@example.net
1408,1677,jodie28@example.net
1726,8711,daleelaine@example.org
1420,9146,donna53@example.net


In [25]:
# Save the DataFrame to a CSV file
df_email.to_excel("./dummyData/test_emailList.xlsx", index=False)

# Exception

In [26]:
# list of 10 random engagement_id, append additional columns to the list as a df. Columns are: Engagement ID, Exception Reason, Exception Reason Description, Exception Reason Category, Exception Reason Subcategory, Exception Reason Type, Exception Reason Status, Exception Reason Status Description, Exception Reason Status Category,  Exception Reason Status Type, Exception Reason Status Start Date, Exception Reason Status User, Exception Reason Status User GUI, Exception Reason Status User Email
engagement_id = df['Engagement ID'].sample(n=25).tolist()
exception_reason_category = []
exception_reason_description = []
exception_reason_status = []
exception_reason_start_date = []
exception_reason_status_user = []
exception_reason_status_user_gui = []
exception_reason_status_user_email = []

for i in engagement_id:
    exception_reason_category.append(random.choice (["Tax", "Inventment Code", "Invoice", "Other"]))
    exception_reason_description.append(fake.sentence())
    exception_reason_status.append(random.choice (["Pending EP Approval", "Pending Finance Approval", "Approved", " Finance Rejected", "EP Rejected" "Expired", "New Time"]))
    exception_reason_start_date.append(fake.date_between(start_date='-4w', end_date='today'))
    exception_reason_status_user.append(fake.last_name() + ", " + fake.first_name())
    exception_reason_status_user_gui.append(fake.unique.pystr_format("#######"))
    exception_reason_status_user_email.append(fake.safe_email())

df_exception = pd.DataFrame(list(zip(engagement_id, exception_reason_category, exception_reason_description, exception_reason_status, exception_reason_start_date, exception_reason_status_user, exception_reason_status_user_gui, exception_reason_status_user_email)), columns=['Engagement ID', 'Exception Reason Category', 'Exception Reason Description', 'Exception Reason Status', 'Exception Reason Status Start Date', 'Exception Reason Status User', 'Exception Reason Status User GUI', 'Exception Reason Status User Email'])

df_exception.head(25)


Unnamed: 0,Engagement ID,Exception Reason Category,Exception Reason Description,Exception Reason Status,Exception Reason Status Start Date,Exception Reason Status User,Exception Reason Status User GUI,Exception Reason Status User Email
0,E-83615405,Inventment Code,Velit rerum similique.,New Time,2024-05-16,"Evans, Clifford",2764092,debracarr@example.com
1,E-71315475,Invoice,Dolorum repellendus dolorum velit accusamus un...,New Time,2024-06-06,"Higgins, Charles",133399,bellrobert@example.org
2,E-65028909,Other,Nisi repudiandae sapiente repudiandae magni fu...,Finance Rejected,2024-06-05,"Moss, Russell",8500961,reedterry@example.net
3,E-01653935,Other,Alias et cumque vel.,Approved,2024-05-22,"Mills, Douglas",9890731,lewis42@example.com
4,E-27151824,Tax,Omnis ut natus fugit.,Approved,2024-05-21,"Burgess, Jennifer",5451638,cartwrightleon@example.org
5,E-79494737,Tax,Recusandae qui possimus consequatur.,New Time,2024-06-02,"Harrison, Harry",916183,catherinejordan@example.net
6,E-98318991,Inventment Code,Occaecati at asperiores repellat.,Pending EP Approval,2024-06-09,"Atkinson, Tina",7884645,qgray@example.com
7,E-19172275,Invoice,Ipsum cum molestias.,Approved,2024-06-11,"Hughes, Nathan",5253922,westcarly@example.com
8,E-95166209,Tax,Alias corporis amet ex blanditiis officiis tem...,Finance Rejected,2024-05-20,"Parker, Denise",7055026,coatescameron@example.com
9,E-23782535,Tax,Omnis a quis temporibus beatae neque tempora.,EP RejectedExpired,2024-06-01,"Stephens, Jayne",2412509,hjarvis@example.org
