In [4]:
%store -r df2023

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px


## Salary Analysis

In [3]:
import plotly.express as px
avg_salary = df2023.groupby('EdLevel')['SalaryUSD'].mean().reset_index()
fig_bar = px.bar(avg_salary, x='EdLevel', y='SalaryUSD', 
                  title="Average Salary by Education Level",
                  labels={'SalaryUSD': 'Average Salary', 'EdLevel': 'Education Level'})
fig_bar.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))


In [4]:
avg_salary = df2023.groupby('RemoteWork')['SalaryUSD'].mean().reset_index()
fig_bar = px.bar(avg_salary, x='RemoteWork', y='SalaryUSD', 
                  title="Average Salary by RemoteWork",
                  labels={'SalaryUSD': 'Average Salary', 'RemoteWork': 'Education Level'})
fig_bar.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))


In [5]:
avg_salary = df2023.groupby('ProfessionalTech')['SalaryUSD'].mean().reset_index()
fig_bar = px.bar(avg_salary, x='ProfessionalTech', y='SalaryUSD', 
                  title="Average Salary by ProfessionalTech",
                  labels={'SalaryUSD': 'Average Salary', 'ProfessionalTech': 'Profession'})
fig_bar.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))

In [6]:
avg_salary = df2023.groupby('Industry')['SalaryUSD'].mean().reset_index()
fig_bar = px.bar(avg_salary, x='Industry', y='SalaryUSD', 
                  title="Average Salary by Industry",
                  labels={'SalaryUSD': 'Average Salary', 'Industry': 'Industry'})
fig_bar.update_layout(title_x=0.5, title_font=dict(size=34), margin=dict(t=80, b=30, l=50, r=30))

In [7]:
def prepare_data_for_treemap(data, column, salary='SalaryUSD'):
    df_sum = data.groupby([column, 'Country'])[salary].sum().reset_index()
    return df_sum
def plot_salary_treemap(data, column, salary='SalaryUSD'):
    top_countries = data.groupby('Country')[salary].sum().nlargest(20).index
    filtered_df = data[data['Country'].isin(top_countries)]
    df_sum = prepare_data_for_treemap(filtered_df, column, salary)
    unique_values_count = len(df_sum[column].unique())
    if unique_values_count > len(px.colors.sequential.Plasma):
        colormap = px.colors.sequential.Viridis
    else:
        colormap = px.colors.sequential.Plasma
    fig = px.treemap(df_sum,
                     path=['Country', column], 
                     values=salary,
                     color=salary,
                     title=f"Treemap of Salary Distribution by {column} in top 20 countries",
                     labels={salary: 'Total Salary'},
                     color_continuous_scale=colormap
                    )
    fig.update_layout(
        title_x=0.5, 
        title_font=dict(size=24),
        margin=dict(t=60, b=20, l=20, r=20),
        coloraxis_colorbar=dict(title='Total Salary')
    )
    fig.show()
plot_salary_treemap(df2023, 'RemoteWork')


In [8]:
plot_salary_treemap(df2023, 'EdLevel')

### Final Salary Analysis
##### Population that earns equal to or above an average of 100k have only a Bachelors degree and opt for remote work, the work mode encouraged  in top performing countries. Professionally this population is distributed in services of developer tools like continuous delivery and integration as well as microservices, mostly providing these services in the finance industry, healthcare, advertisement and higher education.The US has highest pay range and is also the country where remote work is encouraged the most.

## Learning

In [17]:
import plotly.express as px
df_counts = df2023.groupby(['LanguageHaveWorkedWith', 'LearnCodeOnline']).size().reset_index(name='Count')
top_30_combinations = df_counts.nlargest(30, 'Count')
custom_color_scale = px.colors.sequential.Blues
fig_treemap = px.treemap(top_30_combinations, 
                         path=['LanguageHaveWorkedWith', 'LearnCodeOnline'], 
                         values='Count',
                         title="Treemap of Learning Code",
                         labels={'Count': 'Count'},
                         color='Count',  
                         color_continuous_scale=custom_color_scale)  
fig_treemap.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))
fig_treemap.show()

In [9]:
import plotly.express as px
df_counts = df2023.groupby(['LanguageHaveWorkedWith', 'LearnCodeCoursesCert']).size().reset_index(name='Count')

top_20_combinations = df_counts.nlargest(20, 'Count')
fig_treemap = px.treemap(top_20_combinations , 
                         path=['LanguageHaveWorkedWith', 'LearnCodeCoursesCert'], 
                         values='Count',
                         title="Treemap of Programming Languages and Learning Platforms",
                         labels={'Count': 'Count'})
fig_treemap.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))
fig_treemap.show()

In [10]:
import plotly.express as px
df_counts = df2023.groupby(['DatabaseHaveWorkedWith', 'LearnCodeCoursesCert']).size().reset_index(name='Count')

top_20_combinations = df_counts.nlargest(20, 'Count')
fig_treemap = px.treemap(top_20_combinations , 
                         path=['DatabaseHaveWorkedWith', 'LearnCodeCoursesCert'], 
                         values='Count',
                         title="Treemap of Database Languages and Learning Platforms",
                         labels={'Count': 'Count'})
fig_treemap.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))
fig_treemap.show()

In [18]:
import plotly.express as px
df_counts = df2023.groupby(['DatabaseHaveWorkedWith', 'LearnCodeOnline']).size().reset_index(name='Count')
top_20_combinations = df_counts.nlargest(20, 'Count')
custom_color_scale = px.colors.sequential.Blues
fig_treemap = px.treemap(top_20_combinations, 
                         path=['DatabaseHaveWorkedWith', 'LearnCodeOnline'], 
                         values='Count',
                         title="Treemap of Learning Database",
                         labels={'Count': 'Count'},
                         color='Count',  
                         color_continuous_scale=custom_color_scale)  
fig_treemap.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))
fig_treemap.show()


In [19]:
import plotly.express as px
df_counts = df2023.groupby(['LearnCodeOnline', 'LearnCodeCoursesCert']).size().reset_index(name='Count')
top_20_combinations = df_counts.nlargest(20, 'Count')
custom_color_scale = px.colors.sequential.Blues
fig_treemap = px.treemap(top_20_combinations, 
                         path=['LearnCodeOnline', 'LearnCodeCoursesCert'], 
                         values='Count',
                         title="Treemap of Learning Code",
                         labels={'Count': 'Count'},
                         color='Count',  
                         color_continuous_scale=custom_color_scale)  
fig_treemap.update_layout(title_x=0.5, title_font=dict(size=24), margin=dict(t=60, b=20, l=20, r=20))
fig_treemap.show()

### Final Analysis 
##### Online learning is the first choice of developers, with Udemy as a popular choice among e-learning platforms. Although a considerable choice seems to be other platforms as well labelled as "other" which happen to be not listed in the survey form. Pluralsight also stands out as a good choice for learning, especially Microsoft SQL Server