In [1]:
%run set_theme.ipynb

In [2]:
import pandas as pd
import plotly.express as px
import plotly.colors as pc

In [3]:
df = pd.read_parquet('../data/SO_2014_2022.pq')

df = df[(df['Salary'] > 0) & (df['Salary'] < 250000)]

df.head()

Unnamed: 0,Year,Salary,JobSat,YearsCode,YearsCodePro,Age,Education,OrgSize,LastNewJob,Employment,RespondentType,JobSeek,Gender,Student,Country,CodingActivities,DevType,LearnCodeFrom,LangPresent
0,2022,69318.0,,10,5,25-34,master,500 to 999 employees,,fulltime,dev,,male,no,Germany,School or academic work,Data scientist or machine learning specialist;...,"Books / Physical media;School (i.e., Universit...",C;C++;Java;JavaScript;MATLAB;Python;Scala;SQL;...
6,2022,27652.0,,18,10,25-34,bachelor,"1,000 to 4,999 employees",,fulltime,dev,,male,no,Colombia,Hobby,"Developer, full-stack;Developer, back-end",Books / Physical media;Other online resources ...,Bash/Shell/PowerShell;Elixir;HTML/CSS;JavaScri...
9,2022,15431.0,,5,5,25-34,bachelor,20 to 99 employees,,fulltime,dev,,male,no,Ghana,Freelance/contract work,"Developer, back-end",On the job training;Coding Bootcamp,JavaScript;Ruby
13,2022,47352.0,,7,7,45-54,master,10 to 19 employees,,fulltime,non-dev,,male,no,Belgium,Hobby,"Developer, back-end;Educator or academic;Datab...",Books / Physical media;On the job training;Col...,Delphi;SQL
22,2022,78084.0,,25,25,45-54,bachelor,500 to 999 employees,,fulltime,non-dev,,male,no,Canada,Hobby;Contribute to open-source projects,"Engineer, site reliability;Security professional",Books / Physical media;Other online resources ...,Bash/Shell/PowerShell;C;JavaScript;Perl;PHP;Py...


In [4]:
def get_salary_gap_by_age(age_range: str) -> pd.DataFrame:
    # Get male salary distribution per country.
    salary_men_df = df[(df['Gender'] == 'male') & (df['Age'] == age_range)] \
        .groupby(['Country'], as_index=False) \
        .agg({ 'Salary': 'mean' }) \
        .rename(columns={'Salary': 'SalaryMen'})
    
    # Get female salary distribution per country.
    salary_women_df = df[(df['Gender'] == 'female') & (df['Age'] == age_range)] \
        .groupby(['Country'], as_index=False) \
        .agg({ 'Salary': 'mean' }) \
        .rename(columns={'Salary': 'SalaryWomen'})

    # Calculate salary gap.
    new_df = pd.merge(salary_men_df, salary_women_df, on='Country')
    new_df['SalaryGap'] = abs(new_df['SalaryMen'] - new_df['SalaryWomen'])        
    new_df['SalaryGapPercent'] = (new_df['SalaryMen'] - new_df['SalaryWomen']) / new_df['SalaryMen'] * 100

    return new_df


def make_title(age_range: str) -> str:
    """Create the plot title, given the age range."""
    return f'Global salary gap distribution between men and women ({age_range} years old)'

In [48]:
# Gather all age ranges.
age_bins = df['Age'].unique().dropna().sort_values()

# Get the salary gap dataframe, given the first age range,
# which is the slider's first value.
salary_gap_df = get_salary_gap_by_age(age_bins[0])

# Plot the initial map.
fig = px.choropleth(
    salary_gap_df,
    locations='Country',
    locationmode='country names',
    color_continuous_scale=pc.make_colorscale(['#ff1df1', '#fff', '#352bff']),
    range_color=[-100, 100],
    color='SalaryGapPercent',
    hover_data={'SalaryGapPercent': False},
    labels={'SalaryGapPercent': 'Salary Gap'},
    title=make_title(age_bins[0]),
    width=790, # precise width of the desktop container width
    height=600
)

# Some additional map config.
fig.update_geos(showcountries=True, showcoastlines=False)
fig.update_layout(geo={'showocean': True, 'oceancolor': '#a8d5f2'})

# Create the slider.
steps = []
for age_range in age_bins:
    salary_gap_df = get_salary_gap_by_age(age_range)
    step = dict(
        method='update',
        args=[
            {'z': [salary_gap_df['SalaryGapPercent']]},
            {'title': make_title(age_range)},
        ],
        label=age_range
    )
    steps.append(step)

fig.update_layout(
    sliders=[{
        'active': 0,
        'currentvalue': {'prefix': 'Age: '},
        'steps': steps
    }],
    margin={'t': 80, 'r': 20, 'b': 80, 'l': 20},
    coloraxis_colorbar={
        'x': 0.5,
        'y': 0.97,
        'orientation': 'h',
        'len': 0.5,
        'thickness': 10,
        'title': '',
        'tickvals': [-100, 0, 100],
        'ticktext': ['female-favoured', 'neutral', 'male-favoured'],
    }
)

fig.for_each_trace(lambda t: t.update(hovertemplate='<b>%{location}</b><br>Salary gap: %{z:d}%'))

fig.show()

In [7]:
%%html
<style>
.slider-container .slider-rail-rect {
    fill: #d8bea1 !important;
    stroke-width: 0 !important;
    height: 7px !important;
}
.slider-grip-rect {
    fill: #343a42 !important;
    stroke-width: 0 !important;
    filter: drop-shadow(0 0 3px rgba(0, 0, 0, 0.3));
}
</style>