# Create job_logs

This code will create a CSV file named 'data_logs.csv'. 'Id', 'Key', and 'ProcessName' have the same value in each row. The 'ReleaseId' is a random 32-digit number. There are a total of 150 unique IDs. The number of entries for each ID is random and between 500 and 1,000. 'StartTime' and 'EndTime' are random dates between 01.01.2021 and 31.12.2022. 'DateDiff' is the difference in seconds between 'StartTime' and 'EndTime'. The average of this difference varies for each ID and is randomly between 30 and 2,400 seconds.

Please note that generating a large amount of data might take some time.

In [3]:
import pandas as pd
import random
import csv
from datetime import datetime, timedelta

# Function to generate a random date between start and end dates
def random_date(start, end):
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds())))

# Function to generate start time, end time, and their difference in seconds
def random_timedelta(avg_seconds, start_date, end_date):
    start = random_date(start_date, end_date)
    end = start + timedelta(seconds=random.gauss(avg_seconds, avg_seconds/10)) # Normal distribution with standard deviation 1/10 of the average
    if end > end_date:
        end = end_date
    diff = (end - start).total_seconds()
    return start, end, diff

# Define the date range
date_format = "%d.%m.%Y"
start_date = datetime.strptime("01.01.2021", date_format)
end_date = datetime.strptime("31.12.2022", date_format)

data = []

# Create 350 IDs
for i in range(1, 151):
    # Random number of entries for this ID
    entries = random.randint(500, 1000)
    # Random average difference for this ID
    avg_seconds = random.randint(30, 2400)
    for _ in range(entries):
        start, end, diff = random_timedelta(avg_seconds, start_date, end_date)
        # 32 digit random number
        release_id = ''.join(["%s" % random.randint(0, 9) for num in range(0, 32)])
        data.append([i, i, i, release_id, start, end, diff])

# Write data to a CSV file
with open('demo_data/data/job_logs.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Id", "Key", "ProcessName", "ReleaseId", "StartTime", "EndTime", "DateDiff"])
    writer.writerows(data)


# Create weekly_releases
This script will create a csv named weekly_releases.csv. Each row in the file (excluding the header) will have the same value in all four columns, and these values will range from 1 to 350. Every row represents a rpa job which is run weekly.

In [4]:
import csv

# Specify the filename
filename = "demo_data/data/weekly_releases.csv"

# Specify the fieldnames
fieldnames = ["Id", "TenantId", "Key", "Name"]

# Open the file in write mode
with open(filename, "w", newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the header
    writer.writeheader()

    # Write the rows
    for i in range(1, 151):
        writer.writerow({"Id": i, "TenantId": i, "Key": i, "Name": i})
