## Generator for Dummy Data

In [1]:
from faker import Faker
from faker.providers import DynamicProvider
from faker_vehicle import VehicleProvider
from collections import defaultdict
import pandas as pd
import random
from dateutil import parser

In [2]:
rows = 100

fake = Faker("de_DE")
fake.add_provider(VehicleProvider)

department_provider = DynamicProvider(
     provider_name="department_names",
     elements= ["IT", "Controlling", "Sales", "Consulting", "Marketing"],
)
fake.add_provider(department_provider)

### Entry for each Employee

In [3]:
main_data = defaultdict(list)
gender=["m", "w"]

for i in range(rows):
    main_data["PerNR"].append(i)
    main_data["Name"].append(fake.first_name() +" "+ fake.last_name())
    
    temp_address = fake.address().split("\n")
    main_data["Strasse"].append(temp_address[0])
    main_data["Ort"].append(temp_address[1])
    
    main_data["Telefon"].append(fake.phone_number())
    main_data["Abteilung"].append(fake.department_names())
    main_data["Geschlecht"].append(gender[random.randint(0,1)])
    main_data["eingestellt"].append(fake.date())
    main_data["KFZ1"].append(fake.vehicle_make_model())
    main_data["KFZ2"].append(fake.vehicle_make_model())
    
main_df = pd.DataFrame(main_data)
main_df.to_excel("mitarbeiter.xlsx", index=False)
main_df.head()

Unnamed: 0,PerNR,Name,Strasse,Ort,Telefon,Abteilung,Geschlecht,eingestellt,KFZ1,KFZ2
0,0,Hansgeorg Hermighausen,Vittorio-Bien-Ring 5/1,10211 Mellrichstadt,+49 (0) 7620 864509,Controlling,w,1979-11-18,Lexus ES,Dodge Grand Caravan Passenger
1,1,Saban Kuhl,Niklas-Gerlach-Allee 404,88055 Roding,+49(0)5335 53031,Consulting,w,1975-08-04,Toyota Yaris,Chevrolet S10 Regular Cab
2,2,Raisa Kusch,Anne-Rose-Ullmann-Ring 95,41234 Amberg,06618460995,Controlling,w,2010-02-27,Porsche Cayenne,Lexus GS
3,3,Madeleine Freudenberger,Bolanderstr. 6/0,19597 Dinkelsbühl,+49(0) 659723859,Consulting,w,2019-09-14,Mercedes-Benz G-Class,Chevrolet Silverado 3500 HD Regular Cab
4,4,Halil Seifert,Heinz-Jürgen-Lange-Weg 3/0,40593 Heinsberg,+49(0)8983 19434,Sales,w,1977-07-07,GMC Suburban 1500,Toyota FJ Cruiser


### Saving existing Names to create Relations

In [4]:
uniques = main_df["Name"].unique()

existing_names_provider = DynamicProvider(
     provider_name="existing_names",
     elements= list(uniques),
)
fake.add_provider(existing_names_provider)



### Project

In [5]:
project_data = defaultdict(list)
fake.unique.clear()

for i in range(rows):
    project_data["ProNr"].append(i)
    project_data["Bezeichnung"].append(fake.company())
    date = fake.date()
    project_data["Start"].append(date)
    project_data["Ende"].append(fake.date_between_dates(parser.parse(date)))
    
    total_value = random.randint(100, 1000)
    project_data["Auftragswert"].append(total_value)
    project_data["bezahlt"].append(random.randint(0, total_value))
    
    project_data["Leiter"].append(fake.existing_names())
    
    
project_df = pd.DataFrame(project_data)
project_df.to_excel("projekt.xlsx", index=False)
project_df.head()

Unnamed: 0,ProNr,Bezeichnung,Start,Ende,Auftragswert,bezahlt,Leiter
0,0,Mülichen Lindau GmbH,1996-04-09,1998-01-28,359,324,Reza Köhler
1,1,Gutknecht GmbH,2003-09-30,2019-11-22,189,51,Bernd Hörle
2,2,Rörricht Stiftung & Co. KGaA,2000-11-12,2003-01-27,114,80,Florian Schwital
3,3,Gieß,2001-12-09,2004-04-03,663,548,Monja Kambs
4,4,Weimer Kuhl AG,1991-04-15,2001-04-12,542,338,Ismet Faust


### Departments

In [6]:
department_data = defaultdict(list)

for i in range(5):
    department_data["AbtNR"].append(i)
    department_data["Bezeichnung"].append(fake.unique.department_names())
    department_data["Abteilungsleiter"].append(fake.unique.existing_names())
    department_data["Etat"].append(random.randint(10000, 1000000))
    
department_df = pd.DataFrame(department_data)
department_df.to_excel("Abteilung.xlsx", index=False)
department_df

Unnamed: 0,AbtNR,Bezeichnung,Abteilungsleiter,Etat
0,0,Consulting,Friedlinde Dörr,446727
1,1,IT,Sigfried Eberth,373764
2,2,Marketing,Justus Becker,260938
3,3,Controlling,Ignaz Juncken,121620
4,4,Sales,Ehrhard Hermann,144434


### ProjektMitarbeiter

In [7]:
project_employee_data = defaultdict(list)

for i in range(rows):
    project_employee_data["Projekt"].append(random.randint(0, len(main_df)))
    project_employee_data["Mitarbeiter"].append(main_df["Name"][i])
    project_employee_data["Zeitanteil"].append(random.randint(0, 200))
    
project_employee_df = pd.DataFrame(project_employee_data)
project_employee_df.to_excel("projektzeiten.xlsx", index=False)
project_employee_df.head()

Unnamed: 0,Projekt,Mitarbeiter,Zeitanteil
0,9,Hansgeorg Hermighausen,87
1,38,Saban Kuhl,200
2,31,Raisa Kusch,23
3,100,Madeleine Freudenberger,61
4,17,Halil Seifert,27
