In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

In [2]:
df = pd.read_csv(filepath_or_buffer='./analysis_df.csv', delimiter=',')
emp_df = pd.read_csv(filepath_or_buffer='./analysis_df_employee.csv', delimiter=',')

In [3]:
df.head(5)

Unnamed: 0,Age,Gender,Salary,Title,Formal Education,Learning Platforms,Coding Experience,ML Experience,Important Activities,Languages,...,Published Papers,ML Research,ML Serve,Ethical AI tools,Industry Type,Data Storage Products,Data Products,ML Monitor Tools,Country,Year
0,18-21,Man,1528.0,Student,Bachelor's degree,Coursera -- -1,< 1 year,< 1 year,Build and/or run a machine learning service th...,Python -- Java -- C/C++ -- -1,...,,,,,I am a student,,,,Viet Nam,2018
1,18-21,Man,1528.0,Student,Some college/university study without<br>earni...,Udacity -- Coursera -- edX -- Udemy -- -1,< 1 year,< 1 year,Analyze and understand data to influence produ...,Python -- -1,...,,,,,Computers/Technology,,,,Viet Nam,2018
2,18-21,Man,,Student,Bachelor's degree,Coursera -- -1,1-5 years,1-2 years,Build and/or run a machine learning service th...,Python -- -1,...,,,,,Computers/Technology,,,,Viet Nam,2018
3,22-24,Man,1528.0,Student,Master's degree,Coursera -- DataQuest -- -1,< 1 year,< 1 year,Do research that advances the state of the art...,Python -- R -- Bash -- -1,...,,,,,I am a student,,,,Viet Nam,2018
4,22-24,Man,1528.0,Data Scientist,Bachelor's degree,Coursera -- -1,1-5 years,< 1 year,Analyze and understand data to influence produ...,R -- SQL -- -1,...,,,,,Insurance/Risk Assessment,,,,Viet Nam,2018


In [76]:
coding_exp_counts = df['Coding Experience'].value_counts()

avg_salary = df.groupby('Coding Experience')['Salary'].mean().sort_values()

fig = sp.make_subplots(
    rows=1, cols=2,
    column_widths=[0.5, 0.2],
    row_heights=[2],
    subplot_titles=('Average Salary by Coding Experience', 'Coding Experience Distribution'),
    specs=[[{"type": "bar"}, {"type": "pie"}]])

fig.add_trace(go.Bar(x=avg_salary.index, y=avg_salary.values, name='avg_salary', marker_color='red'), row=1, col=1)

fig.add_trace(go.Pie(labels=coding_exp_counts.index, values=coding_exp_counts.values, name='coding_exp'), row=1, col=2)

fig.update_layout(
    title='Coding Experience and Salary',
    xaxis_title="Experience",
    yaxis_title="salary",
    grid=dict(rows=1, columns=2),
    legend_title="Coding Experience",
    template='plotly_white'
)

fig.show()

In [175]:
rate_df = df.groupby(['Title', 'Gender']).size() / df.groupby('Title').size() * 100
rate_df = rate_df.reset_index(name='Rate')
rate_df = rate_df.sort_values(by='Rate')

rate_df['Gender'] = rate_df['Gender'].replace(['Nonbinary', 'Prefer to self-describe', 'Prefer not to say'], 'Others')

fig = go.Figure()

for gender in rate_df['Gender'].unique():
    fig.add_trace(
        go.Bar(
            x=rate_df[rate_df['Gender'] == gender]['Title'],
            y=rate_df[rate_df['Gender'] == gender]['Rate'],
            name=gender
        )
    )

fig.update_layout(
    title={
        'text': 'Gender Rate by Title',
        'x':0.5,
        'y': 0.95
    },
    xaxis_title='Title',
    yaxis_title='Rate (%)',
    template='plotly_white',
    barmode='group',
    legend=dict(
        orientation="h",
        yanchor="middle",
        y=1.05,
        xanchor="center",
        x=0.95
    ),
    bargap=0.2,
    autosize=False,
    width=1800,
    height=600,
    margin=dict(l=50, r=50, t=50, b=50),
    xaxis=dict(type='category'),
    hovermode='x'
)

fig.show()