In [1]:
import pandas as pd
import math

def count_employees(employees: pd.DataFrame) -> pd.DataFrame:
    # Filter the employees who have reports (i.e., the managers)
    # .notna() is used to select rows where the 'reports_to' column is not null
    # This is necessary because we only want to consider employees who report to someone
    reports = employees[employees['reports_to'].notna()]  # .notna() returns a boolean Series indicating whether each value is not missing
    
    # Create a new DataFrame with the count of reports and average age for each manager
    # We use the groupby function to group the rows by the 'reports_to' column
    # This means that we will calculate the average age and count of reports for each manager separately
    # The agg function is used to apply one or more aggregation functions to the grouped data
    # In this case, we use 'mean' to calculate the average age and 'count' to count the number of reports
    # The agg function returns a DataFrame with the aggregated values
    manager_reports = reports.groupby('reports_to').agg({
        'age': 'mean',  # calculate the average age of the reports for each manager
        'employee_id': 'count'  # count the number of reports for each manager
    }).reset_index()  # reset_index is used to reset the index of the resulting DataFrame
    
    # Rename the columns
    manager_reports = manager_reports.rename(columns={
        'reports_to': 'employee_id',  # rename the 'reports_to' column to 'employee_id'
        'age': 'average_age',  # rename the 'age' column to 'average_age'
        'employee_id': 'reports_count'  # rename the 'employee_id' column to 'reports_count'
    })
    
    # Round the average age to the nearest integer
    # We use the apply function to apply a lambda function to each value in the 'average_age' column
    # The lambda function takes a value x and returns math.floor(x + 0.5)
    # This is equivalent to rounding x to the nearest integer
    # The reason we add 0.5 before taking the floor is to ensure that values like 38.5 are rounded up to 39
    # This is because math.floor(38.5) would return 38, but math.floor(38.5 + 0.5) returns 39
    # The apply function returns a new Series with the rounded values
    manager_reports['average_age'] = manager_reports['average_age'].apply(
        lambda x: math.floor(x + 0.5)  # this is equivalent to rounding x to the nearest integer
        # Here's a detailed explanation of how this works:
        # When x is a whole number (e.g. 38), x + 0.5 will be 38.5
        # When x is a decimal number (e.g. 38.5), x + 0.5 will be 39.0
        # When x is a decimal number (e.g. 38.4), x + 0.5 will be 38.9
        # So, when we take the floor of x + 0.5, we get:
        #   - 38 when x is 38
        #   - 39 when x is 38.5 or greater
        #   - 38 when x is 38.4 or less
        # This is equivalent to rounding x to the nearest integer
    )
    
    # Merge the manager reports with the original employees DataFrame to get the manager names
    result = pd.merge(manager_reports, employees[['employee_id', 'name']], on='employee_id')
    
    # Select the desired columns and sort by employee_id
    result = result[['employee_id', 'name', 'reports_count', 'average_age']].sort_values('employee_id')
    
    return result

# Example usage:
data = {
    'employee_id': [9, 6, 4, 2],
    'name': ['Hercy', 'Alice', 'Bob', 'Winston'],
    'reports_to': [None, 9, 9, None],
    'age': [43, 41, 36, 37]
}
employees = pd.DataFrame(data)

result = count_employees(employees)
print(result)


   employee_id   name  reports_count  average_age
0          9.0  Hercy              2           39
