# Step 1: Import Libraries

In [94]:
import numpy as np

# Step 2: Simulate Dataset

In [95]:
# Set seed for reproducibility
np.random.seed(42)

# Sample data
job_titles = np.array(['Data Analyst', 'Data Scientist', 'Software Engineer', 'DevOps Engineer', 'UI/UX Designer', 'Product Manager', 'Cybersecurity Analyst',
                        'Mobile App Developer', 'AI/ML Engineer', 'Cloud Architect', 'Full-Stack Developer', 'Digital Marketer', 'Project Manager', 'Business Analyst',
                        'Big Data Engineer', 'Blockchain Developer', 'Software Architect', 'IT Project Manager', 'Information Security Manager', 'Machine Learning Architect'])
cities = np.array(['Lagos', 'Abuja', 'Port Harcourt', 'Enugu', 'Ibadan'])
employment_types = np.array(['Remote', 'On-site', 'Hybrid'])
experience_levels = np.array(['0-2 yrs', '3-5 yrs', '6-10 yrs', '10+ yrs'])

n = 200  # number of records

# Randomly assign fields
job_role = np.random.choice(job_titles, n)
city = np.random.choice(cities, n)
emp_type = np.random.choice(employment_types, n)
experience = np.random.choice(experience_levels, n)

# Salary ranges in NGN by job role
salary_ranges = {
    'Data Analyst': (200000, 800000),  # Entry to Senior Level
    'Data Scientist': (500000, 2000000),  # Entry to Senior Level
    'Software Engineer': (300000, 1500000),  # Entry to Senior Level
    'DevOps Engineer': (400000, 1250000),  # Entry to Senior Level
    'UI/UX Designer': (150000, 1200000),  # Entry to Senior Level
    'Product Manager': (300000, 2500000),  # Entry to Senior Level
    'Cybersecurity Analyst': (200000, 2000000),  # Entry to Senior Level
    'Mobile App Developer': (300000, 1000000),  # Entry to Senior Level
    'AI/ML Engineer': (500000, 2000000),  # Entry to Senior Level
    'Cloud Architect': (1250000, 2080000),  # Entry to Senior Level
    'Full-Stack Developer': (300000, 1000000),  # Entry to Senior Level
    'Digital Marketer': (100000, 1000000),  # Entry to Senior Level
    'Project Manager': (400000, 1500000),  # Entry to Senior Level
    'Business Analyst': (300000, 1000000),  # Entry to Senior Level
    'Big Data Engineer': (250000, 1000000),  # Entry to Senior Level
    'Blockchain Developer': (400000, 1250000),  # Entry to Senior Level
    'Software Architect': (500000, 1500000),  # Entry to Senior Level
    'IT Project Manager': (500000, 1500000),  # Entry to Senior Level
    'Information Security Manager': (600000, 1500000),  # Entry to Senior Level
    'Machine Learning Architect': (500000, 1500000),  # Entry to Senior Level
}


# Generate salary based on job role
salary = np.array([
    np.random.randint(*salary_ranges[job]) for job in job_role
])


# Step 3: Basic Summary Statistics

In [96]:
print(f"Average Salary: ₦{np.mean(salary):,.2f}")
print(f"Highest Salary: ₦{np.max(salary):,}")
print(f"Lowest Salary: ₦{np.min(salary):,}")

Average Salary: ₦918,560.54
Highest Salary: ₦2,149,396
Lowest Salary: ₦101,062


# Step 4: Top Paying Job Roles

In [97]:
for title in np.unique(job_titles):
    avg_sal = np.mean(salary[job_role == title])
    print(f"{title}: Average Salary = ₦{avg_sal:,.2f}")

AI/ML Engineer: Average Salary = ₦1,358,239.45
Big Data Engineer: Average Salary = ₦662,221.15
Blockchain Developer: Average Salary = ₦767,985.43
Business Analyst: Average Salary = ₦760,689.33
Cloud Architect: Average Salary = ₦1,751,636.14
Cybersecurity Analyst: Average Salary = ₦982,083.20
Data Analyst: Average Salary = ₦525,781.54
Data Scientist: Average Salary = ₦1,335,692.42
DevOps Engineer: Average Salary = ₦814,235.20
Digital Marketer: Average Salary = ₦635,208.57
Full-Stack Developer: Average Salary = ₦560,209.00
IT Project Manager: Average Salary = ₦1,078,145.60
Information Security Manager: Average Salary = ₦1,055,738.00
Machine Learning Architect: Average Salary = ₦817,187.30
Mobile App Developer: Average Salary = ₦595,727.27
Product Manager: Average Salary = ₦1,424,346.75
Project Manager: Average Salary = ₦1,106,210.88
Software Architect: Average Salary = ₦941,002.27
Software Engineer: Average Salary = ₦792,471.92
UI/UX Designer: Average Salary = ₦912,705.90


# Step 5: Experience Level Breakdown

In [98]:
for level in np.unique(experience_levels):
    exp_sal = np.mean(salary[experience == level])
    print(f"{level}: Avg Salary = ₦{exp_sal:,.2f}")

0-2 yrs: Avg Salary = ₦891,465.81
10+ yrs: Avg Salary = ₦961,619.16
3-5 yrs: Avg Salary = ₦976,242.09
6-10 yrs: Avg Salary = ₦846,980.89


### Ordering it based on experience levels

In [99]:
# Define the experience levels in the order you want
ordered_experience_levels = ['0-2 yrs', '3-5 yrs', '6-10 yrs', '10+ yrs']

print("📊 Average Salary by Experience Level in Nigeria Tech Jobs\n")

for level in ordered_experience_levels:
    exp_sal = np.mean(salary[experience == level])
    print(f"{level:<8}: Average Salary = ₦{exp_sal:,.2f}")

📊 Average Salary by Experience Level in Nigeria Tech Jobs

0-2 yrs : Average Salary = ₦891,465.81
3-5 yrs : Average Salary = ₦976,242.09
6-10 yrs: Average Salary = ₦846,980.89
10+ yrs : Average Salary = ₦961,619.16


### Ordering it based on average salary

In [100]:
# Step 1: Collect (experience level, average salary) pairs
exp_salaries = []
for level in np.unique(experience_levels):
    exp_sal = np.mean(salary[experience == level])
    exp_salaries.append((level, exp_sal))

# Step 2: Sort the list in descending order by average salary
exp_salaries.sort(key=lambda x: x[1], reverse=True)

# Step 3: Display results
print("Average Salary by Experience Level (Descending):")
for level, sal in exp_salaries:
    print(f"{level}: Avg Salary = ₦{sal:,.2f}")


Average Salary by Experience Level (Descending):
3-5 yrs: Avg Salary = ₦976,242.09
10+ yrs: Avg Salary = ₦961,619.16
0-2 yrs: Avg Salary = ₦891,465.81
6-10 yrs: Avg Salary = ₦846,980.89


# Step 6: City-Based Analysis

In [101]:
for city_name in np.unique(cities):
    city_sal = np.mean(salary[city == city_name])
    print(f"{city_name}: Avg Salary = ₦{city_sal:,.2f}")

Abuja: Avg Salary = ₦817,733.36
Enugu: Avg Salary = ₦919,468.86
Ibadan: Avg Salary = ₦858,999.53
Lagos: Avg Salary = ₦985,689.24
Port Harcourt: Avg Salary = ₦1,008,152.80


# Step 7: Employment Type Analysis

In [102]:
for etype in np.unique(emp_type):
    etype_sal = np.mean(salary[emp_type == etype])
    print(f"{etype}: Avg Salary = ₦{etype_sal:,.2f}")

Hybrid: Avg Salary = ₦920,124.82
On-site: Avg Salary = ₦942,099.41
Remote: Avg Salary = ₦900,766.50


# Step 8: Filter High-Paying Remote Roles

In [103]:
high_remote = (salary > 500000) & (emp_type == 'Remote')
print("High-paying Remote Roles:")
print(job_role[high_remote])
print(salary[high_remote])

High-paying Remote Roles:
['Machine Learning Architect' 'Mobile App Developer' 'DevOps Engineer'
 'Data Analyst' 'Digital Marketer' 'Digital Marketer'
 'Machine Learning Architect' 'Software Engineer' 'Cybersecurity Analyst'
 'IT Project Manager' 'IT Project Manager' 'AI/ML Engineer'
 'Cybersecurity Analyst' 'Business Analyst' 'DevOps Engineer'
 'Data Scientist' 'DevOps Engineer' 'Blockchain Developer'
 'Big Data Engineer' 'Business Analyst' 'Mobile App Developer'
 'Project Manager' 'IT Project Manager' 'Project Manager'
 'Project Manager' 'Digital Marketer' 'Full-Stack Developer'
 'Software Architect' 'Software Engineer' 'Data Analyst' 'UI/UX Designer'
 'Cybersecurity Analyst' 'Cybersecurity Analyst' 'Digital Marketer'
 'Data Scientist' 'Data Analyst' 'UI/UX Designer' 'UI/UX Designer'
 'Big Data Engineer' 'Software Engineer' 'UI/UX Designer'
 'Business Analyst' 'AI/ML Engineer' 'Cloud Architect' 'UI/UX Designer'
 'Big Data Engineer' 'Information Security Manager' 'Project Manager'
 'P

# Step 9: Find Roles That Pay Above Industry Average

In [104]:
overall_avg = np.mean(salary)
above_avg = job_role[salary > overall_avg]
unique_roles = np.unique(above_avg)

print("Roles with Salaries Above Industry Average:")
print(unique_roles)

Roles with Salaries Above Industry Average:
['AI/ML Engineer' 'Big Data Engineer' 'Blockchain Developer'
 'Business Analyst' 'Cloud Architect' 'Cybersecurity Analyst'
 'Data Scientist' 'DevOps Engineer' 'Digital Marketer'
 'IT Project Manager' 'Information Security Manager'
 'Machine Learning Architect' 'Mobile App Developer' 'Product Manager'
 'Project Manager' 'Software Architect' 'Software Engineer'
 'UI/UX Designer']


# Step 10: Identify Job Role with Highest Spread in Salaries

In [105]:
print("\nSalary Range per Role:")
for role in np.unique(job_titles):
    role_salaries = salary[job_role == role]
    spread = np.max(role_salaries) - np.min(role_salaries)
    print(f"{role}: Spread = ₦{spread:,}")


Salary Range per Role:
AI/ML Engineer: Spread = ₦965,954
Big Data Engineer: Spread = ₦717,022
Blockchain Developer: Spread = ₦721,639
Business Analyst: Spread = ₦297,012
Cloud Architect: Spread = ₦759,791
Cybersecurity Analyst: Spread = ₦1,610,389
Data Analyst: Spread = ₦520,438
Data Scientist: Spread = ₦1,401,423
DevOps Engineer: Spread = ₦764,939
Digital Marketer: Spread = ₦888,451
Full-Stack Developer: Spread = ₦570,818
IT Project Manager: Spread = ₦538,077
Information Security Manager: Spread = ₦886,135
Machine Learning Architect: Spread = ₦834,586
Mobile App Developer: Spread = ₦623,814
Product Manager: Spread = ₦1,567,124
Project Manager: Spread = ₦878,123
Software Architect: Spread = ₦866,345
Software Engineer: Spread = ₦773,808
UI/UX Designer: Spread = ₦665,535


# Step 11: Comparing Salary vs. Experience using Percentiles

This helps identify how salaries vary across experience levels in terms of statistical spread (25th, 50th, 75th percentiles).

In [106]:
print("Salary Percentiles by Experience Level:")
for level in np.unique(experience_levels):
    sal_by_exp = salary[experience == level]
    p25 = np.percentile(sal_by_exp, 25)
    p50 = np.percentile(sal_by_exp, 50)  # median
    p75 = np.percentile(sal_by_exp, 75)
    print(f"{level} → 25th: ₦{p25:,.0f}, 50th (median): ₦{p50:,.0f}, 75th: ₦{p75:,.0f}")

Salary Percentiles by Experience Level:
0-2 yrs → 25th: ₦586,717, 50th (median): ₦810,736, 75th: ₦1,113,157
10+ yrs → 25th: ₦639,924, 50th (median): ₦907,689, 75th: ₦1,193,759
3-5 yrs → 25th: ₦512,920, 50th (median): ₦950,199, 75th: ₦1,333,255
6-10 yrs → 25th: ₦600,188, 50th (median): ₦728,861, 75th: ₦1,070,912


# Step 12: Correlation Between Job Role and Salary using Group Averages

This reveals which job roles command higher or lower average salaries.

In [107]:
job_avg_salaries = []
for role in np.unique(job_role):
    avg = np.mean(salary[job_role == role])
    job_avg_salaries.append((role, avg))

# Sort descending by average salary
job_avg_salaries.sort(key=lambda x: x[1], reverse=True)

print("\nAverage Salary by Job Role (Descending):")
for role, avg in job_avg_salaries:
    print(f"{role}: ₦{avg:,.0f}")


Average Salary by Job Role (Descending):
Cloud Architect: ₦1,751,636
Product Manager: ₦1,424,347
AI/ML Engineer: ₦1,358,239
Data Scientist: ₦1,335,692
Project Manager: ₦1,106,211
IT Project Manager: ₦1,078,146
Information Security Manager: ₦1,055,738
Cybersecurity Analyst: ₦982,083
Software Architect: ₦941,002
UI/UX Designer: ₦912,706
Machine Learning Architect: ₦817,187
DevOps Engineer: ₦814,235
Software Engineer: ₦792,472
Blockchain Developer: ₦767,985
Business Analyst: ₦760,689
Big Data Engineer: ₦662,221
Digital Marketer: ₦635,209
Mobile App Developer: ₦595,727
Full-Stack Developer: ₦560,209
Data Analyst: ₦525,782


# Step 13: Simulating Gender Breakdown and Computing Gender Pay Gap

Let's randomly assign gender, then compute average salaries by gender and the gap.

In [108]:
# Simulate gender array
genders = np.random.choice(['Male', 'Female'], n)

# Compute average salary by gender
male_avg = np.mean(salary[genders == 'Male'])
female_avg = np.mean(salary[genders == 'Female'])

pay_gap = male_avg - female_avg
pay_gap_percent = (pay_gap / male_avg) * 100

print("\nGender Pay Analysis:")
print(f"Average Salary (Male): ₦{male_avg:,.2f}")
print(f"Average Salary (Female): ₦{female_avg:,.2f}")
print(f"Pay Gap: ₦{pay_gap:,.2f} ({pay_gap_percent:.2f}% difference)")


Gender Pay Analysis:
Average Salary (Male): ₦952,775.62
Average Salary (Female): ₦880,743.87
Pay Gap: ₦72,031.75 (7.56% difference)


# Step 14: Classify Salaries into Categories (Low, Mid, High)

This helps segment salaries for better interpretation and possible targeting strategies.

In [109]:
# Define salary thresholds (these can be customized)
low_thresh = 300000
high_thresh = 600000

# Classify each salary
salary_categories = np.where(salary < low_thresh, 'Low',
                     np.where(salary <= high_thresh, 'Mid', 'High'))

# Count by category
unique, counts = np.unique(salary_categories, return_counts=True)

print("\nSalary Category Distribution:")
for cat, count in zip(unique, counts):
    print(f"{cat}: {count} jobs")


Salary Category Distribution:
High: 146 jobs
Low: 6 jobs
Mid: 48 jobs
