import pandas as pd import matplotlib.pyplot as plt import seaborn as
sns import numpy as np

data = { ‘EmployeeID’: \[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20\], ‘Age’: \[23, 28, 37, 23, 55, 32, 58, 46, 53,
58, 29, 46, 49, 57, 53, 57, 43, 29, 23, 50\], ‘Department’: \[‘Finance’,
‘Finance’, ‘HR’, ‘HR’, ‘IT’, ‘Sales’, ‘Finance’, ‘Finance’, ‘HR’, ‘HR’,
‘HR’, ‘HR’, ‘IT’, ‘Sales’, ‘IT’, ‘HR’, ‘HR’, ‘Sales’, ‘IT’, ‘IT’\],
‘Experience’: \[8, 2, 8, 23, 29, 10, 6, 34, 2, 17, 13, 14, 20, 32, 33,
4, 18, 20, 14, 28\], ‘Salary’: \[93563, 41742, 56905, 138397, 96879,
123436, 94781, 144637, 131361, 46377, 107468, 105752, 122125, 79949,
69121, 83010, 96227, 143220, 134907, 140206\] }

df = pd.DataFrame(data)

def analyze_employee_data(df): print(“Employee Salary Analysis” +
“=”\*25 + “”)

    print("1. Basic Statistics:")
    print(f"Total number of employees: {len(df)}")
    print(f"Average salary: ${df['Salary'].mean():.2f}")
    print(f"Median salary: ${df['Salary'].median():.2f}")
    print(f"Highest salary: ${df['Salary'].max():.2f}")
    print(f"Lowest salary: ${df['Salary'].min():.2f}")
    print(f"Salary standard deviation: ${df['Salary'].std():.2f}\n")

    print("2. Department Analysis:")
    dept_analysis = df.groupby('Department').agg({
        'EmployeeID': 'count',
        'Salary': ['mean', 'median', 'min', 'max', 'std']
    }).round(2)

    dept_analysis.columns = ['Count', 'Avg Salary', 'Median Salary', 'Min Salary', 'Max Salary', 'Std Dev']
    print(dept_analysis)
    print()

    print("3. Age Group Analysis:")
    df['Age Group'] = pd.cut(df['Age'], bins=[20, 30, 40, 50, 60], 
                            labels=['20-30', '31-40', '41-50', '51-60'])

    age_analysis = df.groupby('Age Group').agg({
        'EmployeeID': 'count',
        'Salary': ['mean', 'median']
    }).round(2)

    age_analysis.columns = ['Count', 'Avg Salary', 'Median Salary']
    print(age_analysis)
    print()

    print("4. Experience vs Salary Correlation:")
    correlation = df['Experience'].corr(df['Salary'])
    print(f"Correlation coefficient: {correlation:.2f}")
    print(f"This indicates a {'strong' if abs(correlation) > 0.7 else 'moderate' if abs(correlation) > 0.3 else 'weak'} "
          f"{'positive' if correlation > 0 else 'negative'} correlation between experience and salary.\n")

    print("5. Experience Group Analysis:")
    df['Experience Group'] = pd.cut(df['Experience'], bins=[0, 5, 10, 20, 35], 
                                  labels=['0-5 yrs', '6-10 yrs', '11-20 yrs', '21-35 yrs'])

    exp_analysis = df.groupby('Experience Group').agg({
        'EmployeeID': 'count',
        'Salary': ['mean', 'median', 'min', 'max']
    }).round(2)

    exp_analysis.columns = ['Count', 'Avg Salary', 'Median Salary', 'Min Salary', 'Max Salary']
    print(exp_analysis)
    print()

    print("6. Top 5 Highest Paid Employees:")
    top_earners = df.sort_values(by='Salary', ascending=False).head(5)
    print(top_earners[['EmployeeID', 'Age', 'Department', 'Experience', 'Salary']])
    print()

    print("7. Average Salary by Department and Experience Group:")
    pivot = pd.pivot_table(df, values='Salary', index='Department', 
                          columns='Experience Group', aggfunc='mean').round(2)
    print(pivot)
    print()

    print("8. Additional Insights:")
    df['Exp-Age Ratio'] = (df['Experience'] / df['Age']).round(2)
    fast_climbers = df.sort_values(by='Exp-Age Ratio', ascending=False).head(3)
    print("Top 3 employees with highest experience-to-age ratio (fast climbers):")
    print(fast_climbers[['EmployeeID', 'Age', 'Department', 'Experience', 'Exp-Age Ratio', 'Salary']])
    print()

    df['Salary per Year Exp'] = (df['Salary'] / df['Experience']).round(2)
    best_value = df.sort_values(by='Salary per Year Exp', ascending=False).head(3)
    print("Top 3 employees with highest salary per year of experience:")
    print(best_value[['EmployeeID', 'Department', 'Experience', 'Salary', 'Salary per Year Exp']])
    print()

    return df

analyzed_df = analyze_employee_data(df)

def create_visualizations(df): sns.set(style=“whitegrid”)

    plt.figure(figsize=(12, 6))
    sns.boxplot(x='Department', y='Salary', data=df)
    plt.title('Salary Distribution by Department')
    plt.ylabel('Salary ($)')
    plt.tight_layout()
    plt.savefig('salary_by_department.png')

    plt.figure(figsize=(12, 6))
    sns.regplot(x='Experience', y='Salary', data=df, scatter_kws={'alpha':0.6}, line_kws={'color':'red'})
    plt.title('Salary vs. Years of Experience')
    plt.xlabel('Years of Experience')
    plt.ylabel('Salary ($)')
    plt.tight_layout()
    plt.savefig('salary_vs_experience.png')

    plt.figure(figsize=(12, 6))
    sns.barplot(x='Age Group', y='Salary', hue='Department', data=df, errorbar=None)
    plt.title('Average Salary by Age Group and Department')
    plt.ylabel('Average Salary ($)')
    plt.tight_layout()
    plt.savefig('salary_by_age_department.png')

create_visualizations(analyzed_df)

print(“Analysis complete. Visualizations have been saved to:” “-
salary_by_department.png” “- salary_vs_experience.png” “-
salary_by_age_department.png”)