## Generator for Dummy Data

In [1]:
from faker import Faker
from faker.providers import DynamicProvider
from faker_vehicle import VehicleProvider
from collections import defaultdict
import pandas as pd
import random
from dateutil import parser

In [2]:
rows = 10000

fake = Faker("de_DE")
fake.add_provider(VehicleProvider)

department_provider = DynamicProvider(
     provider_name="department_names",
     elements= ["IT", "Controlling", "Sales", "Consulting", "Marketing"],
)
fake.add_provider(department_provider)

### Entry for each Employee

In [3]:
main_data = defaultdict(list)
gender=["m", "w"]

for i in range(rows):
    main_data["PerNR"].append(i)
    main_data["Name"].append(fake.first_name() +" "+ fake.last_name())
    
    temp_address = fake.address().split("\n")
    main_data["Strasse"].append(temp_address[0])
    main_data["Ort"].append(temp_address[1])
    
    main_data["Telefon"].append(fake.phone_number())
    main_data["Abteilung"].append(fake.department_names())
    main_data["Geschlecht"].append(gender[random.randint(0,1)])
    main_data["eingestellt"].append(fake.date())
    main_data["KFZ1"].append(fake.vehicle_make_model())
    main_data["KFZ2"].append(fake.vehicle_make_model())
    
main_df = pd.DataFrame(main_data)
main_df.to_excel("mitarbeiter.xlsx", index=False)
main_df.head()

Unnamed: 0,PerNR,Name,Strasse,Ort,Telefon,Abteilung,Geschlecht,eingestellt,KFZ1,KFZ2
0,0,Pirmin Stiffel,Drewesgasse 4/2,12392 Tirschenreuth,+49(0)7113986524,IT,m,1983-09-27,Rolls-Royce Phantom,Lexus LX
1,1,Alina Jacob,Jäntschweg 2,43861 Fürstenfeldbruck,+49(0)2563 892959,Marketing,w,2008-01-26,Nissan Xterra,Jaguar E-PACE
2,2,Cord Ernst,Aribert-Beer-Gasse 4/2,33545 Kitzingen,+49 (0) 8080 586652,Marketing,w,2018-08-01,Bentley Brooklands,Honda Odyssey
3,3,Linus Steckel,Mosemannplatz 46,17403 Schwandorf,0349829006,Controlling,w,2013-01-16,Chevrolet Silverado 3500 Extended Cab,Land Rover Range Rover
4,4,Herrmann Kostolzin,Mara-Röhrdanz-Straße 21,67002 Parchim,06976 29178,Marketing,w,1993-07-01,Chevrolet Express 3500 Passenger,Mercedes-Benz E-Class


### Saving existing Names to create Relations

In [4]:
uniques = main_df["Name"].unique()

existing_names_provider = DynamicProvider(
     provider_name="existing_names",
     elements= list(uniques),
)
fake.add_provider(existing_names_provider)



### Project

In [5]:
project_data = defaultdict(list)
fake.unique.clear()

for i in range(rows):
    project_data["ProNr"].append(i)
    project_data["Bezeichnung"].append(fake.company())
    date = fake.date()
    project_data["Start"].append(date)
    project_data["Ende"].append(fake.date_between_dates(parser.parse(date)))
    
    total_value = random.randint(100, 1000)
    project_data["Auftragswert"].append(total_value)
    project_data["bezahlt"].append(random.randint(0, total_value))
    
    project_data["Leiter"].append(fake.existing_names())
    
    
project_df = pd.DataFrame(project_data)
project_df.to_excel("projekt.xlsx", index=False)
project_df.head()

Unnamed: 0,ProNr,Bezeichnung,Start,Ende,Auftragswert,bezahlt,Leiter
0,0,Carsten Mangold Stiftung & Co. KGaA,2018-03-05,2020-11-26,562,174,Götz Boucsein
1,1,Gertz Beyer Stiftung & Co. KG,1970-10-16,1991-09-21,237,81,Ekrem Roskoth
2,2,Ruppersberger Täsche GmbH & Co. KG,1996-11-12,2005-05-07,736,409,Kaspar Junken
3,3,Geisel Hendriks AG,1974-07-12,1988-05-30,801,759,Slobodan Kühnert
4,4,Haase,2006-09-11,2015-08-30,333,53,Fritz Lehmann


### Departments

In [6]:
department_data = defaultdict(list)

for i in range(5):
    department_data["AbtNR"].append(i)
    department_data["Bezeichnung"].append(fake.unique.department_names())
    department_data["Abteilungsleiter"].append(fake.unique.existing_names())
    department_data["Etat"].append(random.randint(10000, 1000000))
    
department_df = pd.DataFrame(department_data)
department_df.to_excel("Abteilung.xlsx", index=False)
department_df

Unnamed: 0,AbtNR,Bezeichnung,Abteilungsleiter,Etat
0,0,Sales,Hans-Werner Franke,887712
1,1,IT,Hedda Bloch,522625
2,2,Marketing,Wibke Niemeier,260962
3,3,Controlling,Else Scheuermann,65854
4,4,Consulting,Tilly Heinz,401196


### ProjektMitarbeiter

In [7]:
project_employee_data = defaultdict(list)

for i in range(rows):
    project_employee_data["Projekt"].append(random.randint(0, len(main_df)))
    project_employee_data["Mitarbeiter"].append(main_df["Name"][i])
    project_employee_data["Zeitanteil"].append(random.randint(0, 200))
    
project_employee_df = pd.DataFrame(project_employee_data)
project_employee_df.to_excel("projektzeiten.xlsx", index=False)
project_employee_df.head()

Unnamed: 0,Projekt,Mitarbeiter,Zeitanteil
0,6690,Pirmin Stiffel,120
1,5684,Alina Jacob,172
2,3624,Cord Ernst,4
3,4794,Linus Steckel,106
4,9424,Herrmann Kostolzin,116
