In [1]:
### Covered in this section are the following concepts:
# Pandas Iteration

### Library Imports
import pandas as pd


#### Pandas Iteration

- **`DataFrame.iterrows()`** - Iterate over DataFrame rows as (index, Series) pairs.
  - **Use Case:** Useful for iterating through rows and performing operations row by row, although it can be slower for large DataFrames.

- **`DataFrame.itertuples()`** - Iterate over DataFrame rows as named tuples, which can be more efficient than `iterrows()`.
  - **Use Case:** Preferred over `iterrows()` for performance reasons when you need to iterate over DataFrame rows and access multiple columns.

- **`DataFrame.apply()`** - Apply a function along an axis of the DataFrame (either rows or columns).
  - **Use Case:** Efficient for applying a function to each row or column without explicitly iterating, making it faster and more concise than using loops.


In [2]:
### Sample DataFrame
data = {
    'Employee': ['John', 'Jane', 'Tom', 'Lucy', 'Sam'],
    'Hours_Worked': [40, 35, 50, 30, 45],
    'Hourly_Rate': [15, 20, 25, 22, 18],
    'Department': ['HR', 'Finance', 'IT', 'Finance', 'HR']
}

df = pd.DataFrame(data)
print(df)

  Employee  Hours_Worked  Hourly_Rate Department
0     John            40           15         HR
1     Jane            35           20    Finance
2      Tom            50           25         IT
3     Lucy            30           22    Finance
4      Sam            45           18         HR


In [4]:
### Iterrows problem
# Calculate the total pay for each employee and create a new column Total_Pay in the DataFrame using iterrows()
# Total pay is calculated as Hours_Worked * Hourly_Rate for each employee
for index, row in df.iterrows():
    df.at[index, 'Total_Pay'] = row['Hours_Worked'] * row['Hourly_Rate']

print(df)

  Employee  Hours_Worked  Hourly_Rate Department  Total_Pay
0     John            40           15         HR      600.0
1     Jane            35           20    Finance      700.0
2      Tom            50           25         IT     1250.0
3     Lucy            30           22    Finance      660.0
4      Sam            45           18         HR      810.0


In [None]:
### Iterrows Problem
''' 
Problem: Calculate the bonus for each employee based on their department and hours worked, and create a new column Bonus using apply():
    Employees in HR get a bonus of 10% of their total pay if they worked more than 40 hours.
    Employees in Finance get a bonus of 15% of their total pay if they worked more than 30 hours.
    Employees in IT get a flat bonus of $200 regardless of hours worked.
'''
# Create base function 
def bonus_program(df):
    for index, row in df.iterrows():
        # Cases for bonus structure
        if row['Hours_Worked'] > 40 and row['Department'] == 'HR':
            df.at[index, 'Bonus'] = 0.1 * row['Total_Pay']

        elif row['Hours_Worked'] > 30 and row['Department'] == 'Finance':
            df.at[index, 'Bonus'] = 0.15 * row['Total_Pay']

        elif row['Department'] == 'IT':
            df.at[index, 'Bonus'] = 200

    return df

# Return function via apply
bonus_df = bonus_program(df)
print(df)

In [9]:
### Itertuples Problem
# Identify employees in the Finance department who worked more than 30 hours and print their names and total pay using itertuples()
for row in df.itertuples():
    if row.Department == 'Finance' and row.Hours_Worked > 30:
        print(f'Employee: {row.Employee}, Total Pay: {row.Total_Pay}')


Employee: Jane, Total Pay: 700.0


In [23]:
### Apply Problem
''' 
Problem: Calculate the bonus for each employee based on their department and hours worked, and create a new column Bonus using apply():
    Employees in HR get a bonus of 10% of their total pay if they worked more than 40 hours.
    Employees in Finance get a bonus of 15% of their total pay if they worked more than 30 hours.
    Employees in IT get a flat bonus of $200 regardless of hours worked.
'''
# Using apply to calculate Bonus
def calculate_bonus(row):
    total_pay = row['Hours_Worked'] * row['Hourly_Rate']
    if row['Department'] == 'HR' and row['Hours_Worked'] > 40:
        return total_pay * 0.10
    elif row['Department'] == 'Finance' and row['Hours_Worked'] > 30:
        return total_pay * 0.15
    elif row['Department'] == 'IT':
        return 200
    else:
        return 0

df['Bonus'] = df.apply(calculate_bonus, axis = 1)
print(df)


  Employee  Hours_Worked  Hourly_Rate Department  Total_Pay  Bonus
0     John            40           15         HR      600.0    0.0
1     Jane            35           20    Finance      700.0  105.0
2      Tom            50           25         IT     1250.0  200.0
3     Lucy            30           22    Finance      660.0    0.0
4      Sam            45           18         HR      810.0   81.0
