#### Testing

In [None]:
# Basic imports
import numpy as np
import pandas as pd

In [21]:
# Sample dataframe
data = pd.DataFrame({
    'employee_id': [101, 102, 103, 104, 105, 106, 107],
    'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace'],
    'department': ['Sales', 'Engineering', 'Engineering', 'HR', 'Sales', 'HR', 'Marketing'],
    'salary': [70000, 85000, 80000, 60000, 72000, 61000, 65000],
    'joining_date': pd.to_datetime(['2020-01-15', '2019-03-10', '2021-06-23',
                                     '2018-11-05', '2022-02-20', '2020-12-01', '2021-09-30']),
    'remote': [True, False, False, True, True, False, True],
    'performance_score': [88, 92, 79, 85, 91, 76, 83]
})

data.head()

Unnamed: 0,employee_id,name,department,salary,joining_date,remote,performance_score
0,101,Alice,Sales,70000,2020-01-15,True,88
1,102,Bob,Engineering,85000,2019-03-10,False,92
2,103,Charlie,Engineering,80000,2021-06-23,False,79
3,104,Diana,HR,60000,2018-11-05,True,85
4,105,Eve,Sales,72000,2022-02-20,True,91


In [22]:
# Filter for emplloyees in the Sales department
data[data['department']=='Sales']['name'].unique().tolist()

['Alice', 'Eve']

In [23]:
# Sort by salary in descending order
data.sort_values(by='salary', ascending=False).head()

Unnamed: 0,employee_id,name,department,salary,joining_date,remote,performance_score
1,102,Bob,Engineering,85000,2019-03-10,False,92
2,103,Charlie,Engineering,80000,2021-06-23,False,79
4,105,Eve,Sales,72000,2022-02-20,True,91
0,101,Alice,Sales,70000,2020-01-15,True,88
6,107,Grace,Marketing,65000,2021-09-30,True,83


In [24]:
# Group by department and calculate average salary
data.groupby('department')['salary'].mean().reset_index()

Unnamed: 0,department,salary
0,Engineering,82500.0
1,HR,60500.0
2,Marketing,65000.0
3,Sales,71000.0


In [31]:
# Employees that joined after 2021
data[data['joining_date'] > '2021-01-01'][['name', 'joining_date']]

Unnamed: 0,name,joining_date
2,Charlie,2021-06-23
4,Eve,2022-02-20
6,Grace,2021-09-30


In [32]:
# Remote workers with high performance
data[(data['remote']==True) &
     (data['performance_score'] >80)]

Unnamed: 0,employee_id,name,department,salary,joining_date,remote,performance_score
0,101,Alice,Sales,70000,2020-01-15,True,88
3,104,Diana,HR,60000,2018-11-05,True,85
4,105,Eve,Sales,72000,2022-02-20,True,91
6,107,Grace,Marketing,65000,2021-09-30,True,83


In [34]:
# Create new column for years since joined
data['years_since_joined'] = (pd.to_datetime('today') - data['joining_date']).dt.days // 365

In [38]:
# Groupby department and calculate median salary and max performance score
data.groupby('department') \
    .agg(median_salary=('salary', 'median'),
         max_performance=('performance_score', 'max')) \
    .reset_index()

Unnamed: 0,department,median_salary,max_performance
0,Engineering,82500.0,92
1,HR,60500.0,85
2,Marketing,65000.0,83
3,Sales,71000.0,91
