In [10]:
# Membuat Data Employees BeeTech 

from pymongo import MongoClient
import pandas as pd
import random
import string

# Koneksi MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["beetech_db"]
collection = db["employees"]

# Data generator otomatis 
departments = ["Software", "Hardware", "IT Support", "Cybersecurity", "AI Research"]
first_names = ["Alex", "Blake", "Casey", "Drew", "Elliot", "Finley", "Gray", "Harper", "Jordan", "Kai",
               "Logan", "Morgan", "Noel", "Parker", "Quinn", "Reese", "Sawyer", "Taylor", "Vern", "Wyatt"]
last_names = ["Anderson", "Brown", "Carter", "Davis", "Evans", "Garcia", "Harris", "Johnson", "King",
              "Lewis", "Miller", "Nelson", "Perez", "Roberts", "Scott", "Turner", "Walker", "Young", "Zimmerman"]

employees = []

for i in range(50):
    name = f"{random.choice(first_names)} {random.choice(last_names)}"
    department = random.choice(departments)
    age = random.randint(21, 45)
    salary = random.randint(4000, 10000) * 1000  # ribuan
    employees.append({
        "name": name,
        "department": department,
        "age": age,
        "salary": salary
    })

collection.insert_many(employees)
print("50 data acak karyawan BeeTech Corp berhasil dimasukkan!")

# Tampilkan 10 data pertama
df = pd.DataFrame(list(collection.find({}, {"_id": 0}))).head(15)
display(df)


50 data acak karyawan BeeTech Corp berhasil dimasukkan!


Unnamed: 0,name,department,age,salary
0,Finley Carter,IT Support,21,7875000
1,Casey Nelson,Hardware,25,8939000
2,Sawyer Carter,AI Research,39,4711000
3,Casey Evans,Cybersecurity,44,5268000
4,Alex Miller,Hardware,28,7335000
5,Noel Perez,Software,27,5962000
6,Drew Johnson,AI Research,40,7902000
7,Vern Harris,IT Support,21,8093000
8,Parker Carter,AI Research,45,8645000
9,Sawyer Harris,Hardware,44,7065000


In [11]:
# Tugas 1: 5 Karyawan dengan Gaji Tertinggi per Departemen

pipeline_top5 = [
    {'$sort': {'department': 1, 'salary': -1}},
    {'$group': {
        '_id': '$department',
        'top_employees': {'$push': {'name': '$name', 'salary': '$salary'}}
    }},
    {'$project': {'top_5': {'$slice': ['$top_employees', 5]}}}
]

results_top5 = list(collection.aggregate(pipeline_top5))

print("5 Karyawan dengan Gaji Tertinggi di Tiap Departemen:\n")
for dept in results_top5:
    print(f"Departemen: {dept['_id']}")
    for emp in dept['top_5']:
        print(f" - {emp['name']} : Rp.{emp['salary']:,}")
    print()


5 Karyawan dengan Gaji Tertinggi di Tiap Departemen:

Departemen: Cybersecurity
 - Taylor Lewis : Rp.9,996,000
 - Vern Turner : Rp.9,453,000
 - Quinn Young : Rp.8,164,000
 - Vern Zimmerman : Rp.7,630,000
 - Elliot Evans : Rp.7,501,000

Departemen: Software
 - Taylor Perez : Rp.9,748,000
 - Taylor Perez : Rp.7,533,000
 - Logan Garcia : Rp.7,408,000
 - Elliot Walker : Rp.6,966,000
 - Noel Perez : Rp.5,962,000

Departemen: IT Support
 - Vern Harris : Rp.8,093,000
 - Finley Carter : Rp.7,875,000
 - Logan Walker : Rp.7,334,000
 - Kai Scott : Rp.7,212,000
 - Casey Walker : Rp.6,583,000

Departemen: AI Research
 - Quinn Nelson : Rp.9,783,000
 - Parker Zimmerman : Rp.9,101,000
 - Parker Carter : Rp.8,645,000
 - Drew Johnson : Rp.7,902,000
 - Vern Nelson : Rp.5,899,000

Departemen: Hardware
 - Casey Nelson : Rp.8,939,000
 - Elliot Harris : Rp.8,747,000
 - Gray Carter : Rp.8,011,000
 - Finley Lewis : Rp.8,004,000
 - Reese Walker : Rp.7,676,000



In [12]:
# Tugas 2: Hapus Karyawan di Bawah 25 Tahun
import pandas as pd

deleted = collection.delete_many({'age': {'$lt': 25}})
print(f"{deleted.deleted_count} karyawan usia <25 tahun berhasil dihapus.")

data_staff = list(collection.find())
df = pd.DataFrame(data_staff)
display(df)


8 karyawan usia <25 tahun berhasil dihapus.


Unnamed: 0,_id,name,department,age,salary
0,690b4d157456813de068d816,Casey Nelson,Hardware,25,8939000
1,690b4d157456813de068d817,Sawyer Carter,AI Research,39,4711000
2,690b4d157456813de068d818,Casey Evans,Cybersecurity,44,5268000
3,690b4d157456813de068d819,Alex Miller,Hardware,28,7335000
4,690b4d157456813de068d81a,Noel Perez,Software,27,5962000
5,690b4d157456813de068d81b,Drew Johnson,AI Research,40,7902000
6,690b4d157456813de068d81d,Parker Carter,AI Research,45,8645000
7,690b4d157456813de068d81e,Sawyer Harris,Hardware,44,7065000
8,690b4d157456813de068d81f,Logan Carter,IT Support,43,5197000
9,690b4d157456813de068d820,Quinn Young,Cybersecurity,27,8164000


In [13]:
# Tugas 3: Total Gaji dan Rata-rata Umur per Departemen

pipeline_report = [
    {'$group': {
        '_id': '$department',
        'total_salary': {'$sum': '$salary'},
        'average_age': {'$avg': '$age'}
    }},
    {'$sort': {'_id': 1}}
]

report = list(collection.aggregate(pipeline_report))

df_report = pd.DataFrame(report)
df_report.rename(columns={'_id': 'Department', 'total_salary': 'Total Salary', 'average_age': 'Average Age'}, inplace=True)

print("Laporan Total Gaji & Rata-rata Umur per Departemen:")
display(df_report)


Laporan Total Gaji & Rata-rata Umur per Departemen:


Unnamed: 0,Department,Total Salary,Average Age
0,AI Research,51160000,40.285714
1,Cybersecurity,67374000,35.5
2,Hardware,73008000,33.8
3,IT Support,53412000,36.777778
4,Software,36597000,32.0
