In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter, OrderedDict
from pywaffle import Waffle
import pathlib


In [None]:
# Read and filter out irrelevant entries
df = pd.read_csv('./input/input.csv')
df = df[(df['State'] != 'Refused') & (df['State'] != 'Reminder sent') & (df['State'] != 'excluded') ]

# Create output directory if does not exists
out_dir = pathlib.Path('./output')
out_dir.mkdir(parents=True, exist_ok=True) 

# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above) 
sns.set(style="darkgrid")

## Histograms

In [None]:
def generate_company_size_plot(df):
    # Uniformizing values
    df['Company size'][df['Company size'] == '51-100'] = '51-500'
    df['Company size'][df['Company size'] == '100-500'] = '51-500'
    df['Company size'][df['Company size'] == '5000-10000'] = '500-10000'
    # Sorting bins
    df['Company size'] = pd.Categorical(df['Company size'], ['1-10','11-50','51-500','500-10000','10001+'])


    # Create the plot
    ax = sns.histplot(data=df, x="Company size")
    # ax.set_title('')
    ax.set_ylabel('# interviewees')

    plt.savefig(out_dir/'company_size.pdf', bbox_inches='tight')
    plt.show()


generate_company_size_plot(df)

In [None]:
def generate_experience_g_plot(df):
    # Sorting bins
    df['Experience (general)'] = pd.Categorical(df['Experience (general)'], ['0-2', '3-5', '6-9', '10+'])


    # Create the plot
    ax = sns.histplot(data=df, x="Experience (general)")
    # ax.set_title('')
    ax.set_ylabel('# interviewees')
    ax.set_ylim(0,12)

    plt.savefig(out_dir/'experience_general.pdf', bbox_inches='tight')
    plt.show()

generate_experience_g_plot(df)

In [None]:
def generate_experience_ml_plot(df):
    # Sorting bins
    df['Experience (ML)'] = pd.Categorical(df['Experience (ML)'], ['0-2', '3-5', '6-9', '10+'])


    # Create the plot
    ax = sns.histplot(data=df, x="Experience (ML)")
    # ax.set_title('')
    ax.set_ylabel('# interviewees')
    ax.set_ylim(0,15)


    plt.savefig(out_dir/'experience_ml.pdf', bbox_inches='tight')
    plt.show()

generate_experience_ml_plot(df)

## Pie charts

In [None]:
def generate_waffle_plot(df, col_name, file_name=None):
    cnt = OrderedDict(Counter(df[col_name].dropna().tolist()).most_common()) 
    tot = sum(cnt.values())
    val = [ 100*(x/tot) for x in cnt.values()]
    labels = [ f'{x} ({"{:.0f}".format(val[i])}%)' for i,x in enumerate(cnt.keys())]
    
    fig = plt.figure(
        FigureClass=Waffle, 
        rows=4, 
        values=cnt, 
        labels=labels,
        colors=sns.color_palette('colorblind')[:len(cnt)],
        legend={'loc': 'upper left', 'bbox_to_anchor': (1, 0.9), 'fontsize':10},
        icons='user', icon_size=21, 
        icon_legend=True
    )

    if file_name:
        plt.savefig(out_dir/f'{file_name}.pdf', bbox_inches='tight')
    

### Sector

In [None]:

def generate_sector_pie_plot(df):
    cnt = OrderedDict(Counter(df['Sector'].dropna().tolist()).most_common())
    keys, values = list(cnt.keys()), list(cnt.values())
    fig1, ax1 = plt.subplots()
    plt.pie(values, labels=keys, colors = sns.color_palette(), explode = [.05 for _ in range(len(values))], autopct='%1.1f%%', startangle=90, pctdistance=0.85 )
    #draw circle
    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()
    fig.gca().add_artist(centre_circle)
    # Equal aspect ratio ensures that pie is drawn as a circle
    ax1.axis('equal')  
    plt.tight_layout()
    plt.show()

generate_sector_pie_plot(df)

In [None]:
def generate_sector_waffle_plot(df):
    generate_waffle_plot(df, 'Sector', file_name='sector_waffle')
    
generate_sector_waffle_plot(df)

### Where

In [None]:
def generate_sector_waffle_plot(df):
    generate_waffle_plot(df, 'Where', file_name='world_region')
    
generate_sector_waffle_plot(df)

### Job role

In [None]:
def generate_sector_waffle_plot(df):
    generate_waffle_plot(df, 'Job title', file_name='job_role')
    
generate_sector_waffle_plot(df)