# Kaggle Survey 2021

In [None]:
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import numpy as np

# I was planning to make the plots interactive and searching for a way to do that.
# Using widgets and adding a dropdown or a button to update the graphs was an idea from
# https://www.kaggle.com/wildbenji/graph-generator-widget-grouping-options.
# Still, it didn't work out well when the notebook is saved.
# import ipywidgets as widgets
# from IPython.display import display, clear_output
# from ipywidgets import Dropdown, Output

# Right now, I am using plotly to make plots interactive,
# It is an idea from https://www.kaggle.com/toomuchsauce/mental-health-in-tech-plotly-interactive-viz
import plotly.graph_objects as go

In [None]:
# Loading the dataset
df = pd.read_csv('../input/kaggle-survey-2021/kaggle_survey_2021_responses.csv', low_memory=False)
descriptions = df.iloc[0,:] # The first row contains the description of the columns, keep it sepearetely
df = df.iloc[1:,:]          # Eliminate the descriptions from the dataframe to ease of use

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
print('List of columns:\n{0}'.format(df.columns))

# Number of people participated to survey

In [None]:
# People participated to the survey
number_of_people = df.shape[0]
number_of_questions_answered = df.shape[1]
print('Number of people participated to the survey:', df.shape[0])
print('Number of questions answered:', df.shape[1])

In [None]:
# Turkish people participated to the survey
df_Turkey = df[df.Q3 == 'Turkey']
number_of_turkish_people = df_Turkey.shape[0]
print('Number of Turkish people participated to the survey:', number_of_turkish_people)

# Country

In [None]:
print('List of countries:\n{0}'.format(sorted(df[df.columns[3]].unique())))

In [None]:
# Countries which are not matching with geopandas map
countries = df[df.columns[3]].copy()
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

countries_not_matching = []
for name in df.Q3.value_counts().index:
    if sum(world['name'] == name) == 0:
        countries_not_matching.insert(len(countries_not_matching), name)
print(countries_not_matching)

In [None]:
# Replace and remove countries, which are not matching with the map
countries.replace({'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
                   'Iran, Islamic Republic of...'                        : 'Iran',
                   'Czech Republic'                                      : 'Czechia',
                   'Viet Nam'                                            : 'Vietnam'}, inplace=True)
x = countries.value_counts().values
y = countries.value_counts().index

countries_not_matching = ['Other', 'Singapore', 'Hong Kong (S.A.R.)', 'I do not wish to disclose my location']
for country in countries_not_matching:
    index = y.get_loc(country)
    y = y.delete(index)
    x = np.delete(x, index)

# Reading the geopandas data 
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Next we need to create a dataframe with lis_countries and lis_pop
column = 'kaggle_population'
country_data = pd.DataFrame(y, columns=['Country'])
country_data[column] = x

# Next, we are going to visualize this...
world = world.set_index('name').join(country_data.set_index('Country')).reset_index()
fig, ax = plt.subplots(figsize=(24,16))

world.plot(column=column, ax=ax, legend=True, legend_kwds={'label': "Kaggle population by country", 'orientation': "horizontal", 'shrink': 0.3})
ax.set_axis_off();

In [None]:
def show_pie_chart(my_info, question, values, labels, text):
    buttons = []
    fig = go.Figure()
    for i in range(len(labels)):
        label = labels[i]
        percent = 100.*values[i]/values.sum()
        fig.add_trace(go.Pie(
                 values    = values,
                 labels    = labels,
                 pull      = (labels == label) * 0.15,
                 title     = dict(text = '{0}<br>{1:1.2f}% '.format(question, percent) + text[i], font = dict(size=18)),
                 hoverinfo ='label+percent'))

        buttons.append({'label'  : label,
                        'method' : 'update',
                        'args'   : [{'visible'  : [True if x == i else False for x in range(len(labels))]}, 
                                    {'title'    : label},
                                    {'selected' : [True if label == my_info else False for x in labels]}]})

    my_index = labels.get_loc(my_info)
    fig.update_layout(margin=dict(t=0, b=0, l=0, r=0), updatemenus=[dict(type='dropdown', buttons=buttons, active=my_index)])
    fig.update_traces(textposition='inside', textinfo='label+percent')
    
    for i in range(len(labels)):
        fig.data[i].visible = False
    fig.data[my_index].visible = True
    fig.show()

In [None]:
my_country = 'Turkey'
question = descriptions.iloc[3]

# Arrange long labels so that they can fit to the plot
countries = df.Q3
countries.replace({'United Kingdom of Great Britain and Northern Ireland': 'UK',
                   'I do not wish to disclose my location'               : 'No Answer',
                   'Iran, Islamic Republic of...'                        : 'Iran',
                   'United Arab Emirates'                                : 'UAE',
                   'United States of America'                            : 'USA',
                   'Viet Nam'                                            : 'Vietnam'}, inplace=True)
values = countries.value_counts().values  # Kaggler population of countries
labels = countries.value_counts().index   # Countries

show_pie_chart(my_country, question, values, labels, ['of Kagglers are from {0}.'.format(label) for label in labels])

# Age

In [None]:
def show_bar_chart_with_mean(my_info, sorted_mean, text):
    buttons = []
    fig = go.Figure()
    sorted_labels = [item[0] for item in sorted_mean]
    sorted_values = [item[1] for item in sorted_mean]
    for i in range(len(sorted_labels)):
        label = sorted_labels[i]
        fig.add_trace(go.Bar(
            x      = sorted_labels,
            y      = sorted_values,
            marker = dict(color = ["#2471A3" if x == label else "#BBBBBB" for x in sorted_labels])))

        buttons.append({
            'label'  : label,
            'method' : 'update',
            'args'   : [{'visible'  : [True if x == i else False for x in range(len(sorted_labels))]},
                        {'title'    : 'Kagglers from {0} are {1:1.2f} {2} {3} {4} than the global average.'.format(
                                            sorted_labels[i], 
                                            abs(global_mean - sorted_values[i]),
                                            text[0],
                                            text[1] if sorted_values[i] < global_mean else text[2],
                                            text[3])},
                        {'selected' : [True if label == my_info else False for x in sorted_labels]}]})

    my_index = sorted_labels.index(my_info)
    diff = sorted_values[my_index] - global_mean
    fig.update_layout(
        updatemenus = [dict(type='dropdown', x = 0.99, y = 1.0, buttons=buttons, active=my_index)],
        title       = dict(text='Kagglers from {0} are {1:1.2f} {2} {3} {4} than the global average.'.format(
                                    my_info, abs(diff), text[0], text[1] if diff < 0 else text[2], text[3]),
                          font = dict(size=18)),
        margin      = dict(t=50, b=0, l=0, r=0),
        shapes      = [dict(
            type ='line',
            yref ='y', y0 = global_mean, y1 =global_mean,
            xref ='x', x0 = -1, x1 = len(sorted_labels))])

    fig.add_annotation(x=len(sorted_labels)*.90, y=global_mean+1, text="Global Mean: {0:1.2f}".format(global_mean), showarrow=False)

    for i in range(len(sorted_labels)):
        fig.data[i].visible = False
    fig.data[my_index].visible = True
    fig.show()

In [None]:
# Data
country_labels = countries.value_counts().index.tolist()   # Countries
country_labels.sort()

# Get the age groups and sort them
age_ranges = df.Q1.value_counts().index.tolist()           # Age groups
age_ranges.sort()

def get_range_mean(ranges):
    # Get the mean values from the ranges
    range_mean = []
    for rang in ranges:
        values = rang.split('-')
        if (len(values) > 1):
            range_mean.append((float(values[0]) + float(values[1])) / 2)
        else:
            range_mean.append(float(values[0].replace('+', '')))
    return range_mean
age_ranges_mean_values = get_range_mean(age_ranges)

def find_mean(df, question, ranges, ranges_mean_values):
    mean = 0
    for i in range(len(ranges)):
        value = ranges[i]
        range_mean = ranges_mean_values[i]
        mean += (len(df[question == value]) * range_mean) / df.shape[0]
    return mean

global_mean = find_mean(df, df.Q1, age_ranges, age_ranges_mean_values)
country_mean = [[country, find_mean(df[df.Q3 == country], df.Q1[df.Q3 == country], age_ranges, age_ranges_mean_values)] for country in country_labels]
sorted_country_mean = sorted(country_mean, key=lambda x: x[1], reverse=True)
show_bar_chart_with_mean(my_country, sorted_country_mean, ['age', 'younger', 'older', ''])

In [None]:
my_age = '22-24'
question = descriptions.iloc[1]
values = df.Q1.value_counts().values  # Ages of Kagglers
labels = df.Q1.value_counts().index   # Age ranges

show_pie_chart(my_age, question, values, labels, ['of Kagglers are in your age.' for label in labels])

In [None]:
values = df_Turkey.Q1.value_counts().values  # Ages of Turkish Kagglers
labels = df_Turkey.Q1.value_counts().index   # Age ranges

show_pie_chart(my_age, question, values, labels, ['of Turkish Kagglers are in your age.' for label in labels])

# Gender

In [None]:
def show_bar_chart(my_info, question, x, y, text, axes, showpercent=False):
    buttons = []
    fig = go.Figure()
    for i in range(len(y)):
        label = y[i]
        fig.add_trace(go.Bar(
                 x           = x, 
                 y           = y, 
                 orientation = "h",
                 text        = ['{0:1.2f}%'.format(100.*x[i]/x.sum()) for i in range(len(y))] if showpercent else x,
                 marker      = dict(color = ["#2471A3" if x == label else "#BBBBBB" for x in y])))
        
        buttons.append({'label'  : label,
                        'method' : 'update',
                        'args'   : [{'visible'  : [True if x == i else False for x in range(len(y))]},
                                    {'title'    : '{0}<br>{1}'.format(question, text[i])},
                                    {'selected' : [True if label == my_info else False for x in y]}]})
    
    my_index = y.get_loc(my_info)
    fig.update_layout(updatemenus = [dict(type='dropdown', x = 1.0, y = 1.0, buttons=buttons, active=my_index)],
                      title       = dict(text  = '{0}<br>{1}'.format(question, text[my_index]), font = dict(size=18)),
                      xaxis       = dict(title = axes[0]),
                      yaxis       = dict(title = axes[1]))
    
    for i in range(len(y)):
        fig.data[i].visible = False
    fig.data[my_index].visible = True
    fig.show()

In [None]:
my_gender = 'Man'
question = descriptions.iloc[2]
x = df.Q2.value_counts().values       # Number of Kagglers
y = df.Q2.value_counts().index        # Gender
axes = ['Number of Kagglers', 'Genders']

show_bar_chart(my_gender, question[0:question.index('?')+1], x, y, ['Genders of Kagglers.'] * len(y), axes)

In [None]:
x = df_Turkey.Q2.value_counts().values  # Number of Turkish Kagglers
y = df_Turkey.Q2.value_counts().index   # Gender

show_bar_chart(my_gender, question[0:question.index('?')+1], x, y, ['Genders of Turkish Kagglers.'] * len(y), axes)

# Population Pyramid

In [None]:
def show_population_pyramid(df, title, tickvals, ticktext):
    # Data
    genders       = df.Q2.value_counts().index.tolist()
    age_groups    = df.Q1.value_counts().index.tolist()
    age_groups.sort()

    multiplier = 1
    fig = go.Figure()
    for gender in genders:
        fig.add_trace(
            go.Bar(
                x           = [len(df[(df.Q2 == gender) & (df.Q1 == age)]) * multiplier for age in age_groups], 
                y           = age_groups,
                text        = [len(df[(df.Q2 == gender) & (df.Q1 == age)]) for age in age_groups],
                hoverinfo   = 'skip',
                orientation = 'h',
                name        = gender))
        multiplier *= -1

    fig.update_layout(margin       = dict(t=50, b=0, l=0, r=0),
                      title        = title,
                      barmode      = 'relative',
                      bargap       = 0.0,
                      bargroupgap  = 0,
                      xaxis        = dict(
                          tickvals = tickvals,
                          ticktext = ticktext,
                          title    = 'Population'),
                      yaxis        = dict(
                          title    = 'Age group'))
    
    fig.show()

In [None]:
show_population_pyramid(df, 'Population Pyramid of Kagglers', [-1000, 0, 1000, 2000, 3000, 4000], ['1000', '0', '1000', '2000', '3000', '4000'])

In [None]:
show_population_pyramid(df_Turkey, 'Population Pyramid of Turkish Kagglers', [-20, 0, 20, 40, 60, 80], ['20', '0', '20', '40', '60', '80'])

# Background

In [None]:
my_background = "Bachelor’s degree"
question = descriptions.iloc[4]
background = df.Q4.copy()
background.replace({'Some college/university study without earning a bachelor’s degree':
                    'Some college/university study<br>without earning a bachelor’s degree'}, inplace=True)
x = background.value_counts().values       # Number of Kagglers
y = background.value_counts().index        # Background
axes = ['Number of Kagglers', 'Background']

show_bar_chart(my_background, question[0:question.index('?')+1], x, y, ['Background of Kagglers.'] * len(y), axes, True)

In [None]:
background = df_Turkey.Q4.copy()
background.replace({'Some college/university study without earning a bachelor’s degree':
                    'Some college/university study<br>without earning a bachelor’s degree'}, inplace=True)
x = background.value_counts().values       # Number of Kagglers

show_bar_chart(my_background, question[0:question.index('?')+1], x, y, ['Background of Turkish Kagglers.'] * len(y), axes, True)

# Job

In [None]:
my_job = 'Software Engineer'
question = descriptions.iloc[5]
values = df.Q5.value_counts().values  # Number of Kagglers
labels = df.Q5.value_counts().index   # Jobs

show_pie_chart(my_job, question[0:question.index(':')], values, labels, ['of Kagglers are doing same job as you.' for label in labels])

In [None]:
values = df_Turkey.Q5.value_counts().values  # Number of Kagglers
labels = df_Turkey.Q5.value_counts().index   # Jobs

show_pie_chart(my_job, question[0:question.index(':')], values, labels, ['of Turkish Kagglers are doing same job as you.' for label in labels])

# Experience

In [None]:
# Get the experience groups and sort them
ranges = df.Q6.value_counts().index
experience_ranges = ranges.tolist()
experience_ranges = [experience.replace(' years', '') for experience in experience_ranges]
experience_ranges[-1] = '0'    # 'I have never written code'
experience_ranges[1] = '1'     # '< 1 years'
experience_ranges_mean_values = get_range_mean(experience_ranges)

global_mean = find_mean(df, df.Q6, ranges, experience_ranges_mean_values)
country_mean = [[country, find_mean(df[df.Q3 == country], df.Q6[df.Q3 == country], ranges, experience_ranges_mean_values)] for country in country_labels]
sorted_country_mean = sorted(country_mean, key=lambda x: x[1], reverse=True)
show_bar_chart_with_mean(my_country, sorted_country_mean, ['year', 'less', 'more', 'experienced'])

In [None]:
my_job = '1-3 years'
question = descriptions.iloc[6]
values = df.Q6.value_counts().values  # Number of Kagglers
labels = df.Q6.value_counts().index   # Experience

show_pie_chart(my_job, question, values, labels, ['of Kagglers have the same experience as you.' for label in labels])

In [None]:
values = df_Turkey.Q6.value_counts().values  # Number of Kagglers
labels = df_Turkey.Q6.value_counts().index   # Experience

show_pie_chart(my_job, question, values, labels, ['of Turkish Kagglers have the same experience as you.' for label in labels])

# Programming Language

In [None]:
def get_x_y(df, lower_range, upper_range):
    x = []
    y = []
    for i in range(lower_range, upper_range + 1):
        x.append(df.iloc[:, i].notna().sum())
        y.append(df.iloc[:, i].unique()[1].strip() if pd.isna(df.iloc[:, i].unique()[0]) else df.iloc[:, i].unique()[0].strip())

    sort = sorted(zip(x, y), reverse=True)
    x = [i for i, j in sort]
    y = [j for i, j in sort]
    return np.array(x), pd.Index(y)

In [None]:
my_programming_language = 'C++'
question = descriptions.iloc[7]     # Question 7-19
x, y = get_x_y(df, 7, 19)           # Number of Kagglers, Programming language choices
axes = ['Number of Kagglers', 'Programming Language']

show_bar_chart(my_programming_language, question[0:question.index('?')+1], x, y, ['Programming languge choice of Kagglers'] * len(y), axes, True)

In [None]:
x, y = get_x_y(df_Turkey, 7, 19)    # Number of Kagglers, Programming language choices

show_bar_chart(my_programming_language, question[0:question.index('?')+1], x, y, ['Programming languge choice of Turkish Kagglers'] * len(y), axes, True)

In [None]:
my_recommendation = 'Python'
question = descriptions.iloc[20]
values = df.Q8.value_counts().values  # Number of Kagglers
labels = df.Q8.value_counts().index   # Recommendation

show_pie_chart(my_recommendation, question[0:question.index('?')+1], values, labels, ['of Kagglers recommending the same programming language as you.' for label in labels])

In [None]:
values = df_Turkey.Q8.value_counts().values  # Number of Kagglers
labels = df_Turkey.Q8.value_counts().index   # Recommendation

show_pie_chart(my_recommendation, question[0:question.index('?')+1], values, labels, ['of Turkish Kagglers recommending the same programming language as you.' for label in labels])

# IDE

In [None]:
def replace_text(y, text, to):
    array = y.to_numpy()
    return pd.Index([item.replace(text, to) for item in array])

In [None]:
my_programming_language = 'Visual Studio'
question = descriptions.iloc[33]     # Question 21-33
x, y = get_x_y(df, 21, 33)           # Number of Kagglers, IDE choices
y = replace_text(y, 'Jupyter (JupyterLab, Jupyter Notebooks, etc)', 'Jupyter')
y = replace_text(y, 'Visual Studio Code (VSCode)', 'VSCode')
axes = ['Number of Kagglers', 'Integrated Development Environment']

show_bar_chart(my_programming_language, question[0:question.index('?')+1], x, y, ['IDE choice of Kagglers'] * len(y), axes, True)

In [None]:
x, y2 = get_x_y(df_Turkey, 21, 33)           # Number of Kagglers, IDE choices

show_bar_chart(my_programming_language, question[0:question.index('?')+1], x, y, ['IDE choice of Turkish Kagglers'] * len(y), axes, True)

# Hosted Notebooks

In [None]:
my_hosted_notebook = 'Kaggle Notebooks'
question = descriptions.iloc[34]     # Question 34-50
x, y = get_x_y(df, 34, 50)           # Number of Kagglers, Hosted Notebook choices
y = replace_text(y, 'Google Cloud Notebooks (AI Platform / Vertex AI)', 'Google Cloud Notebooks')
axes = ['Number of Kagglers', 'Hosted Notebook']

show_bar_chart(my_hosted_notebook, question[0:question.index('?')+1], x, y, ['Hosted Notebook choice of Kagglers'] * len(y), axes, True)

In [None]:
x, y2 = get_x_y(df_Turkey, 34, 50)           # Number of Kagglers, Hosted Notebook choices

show_bar_chart(my_hosted_notebook, question[0:question.index('?')+1], x, y, ['Hosted Notebook choice of Turkish Kagglers'] * len(y), axes, True)

# Computing Platform

In [None]:
my_computing_platform = 'A personal computer / desktop'
question = descriptions.iloc[51]
computing_platform = df.Q11.copy()
computing_platform.replace({'A cloud computing platform (AWS, Azure, GCP, hosted notebooks, etc)': 'A cloud computing platform',
                            'A deep learning workstation (NVIDIA GTX, LambdaLabs, etc)': 'A deep learning workstation'}, inplace=True)
values = computing_platform.value_counts().values  # Number of Kagglers
labels = computing_platform.value_counts().index   # Computing Platform

show_pie_chart(my_computing_platform, question[0:question.index('?')+1], values, labels, ['of Kagglers are using same type of computing platform as you.' for label in labels])

In [None]:
computing_platform = df_Turkey.Q11.copy()
computing_platform.replace({'A cloud computing platform (AWS, Azure, GCP, hosted notebooks, etc)': 'A cloud computing platform',
                            'A deep learning workstation (NVIDIA GTX, LambdaLabs, etc)': 'A deep learning workstation'}, inplace=True)
values = computing_platform.value_counts().values  # Number of Kagglers
labels = computing_platform.value_counts().index   # Computing Platform

show_pie_chart(my_computing_platform, question[0:question.index('?')+1], values, labels, ['of Turkish Kagglers are using same type of computing platform as you.' for label in labels])

# Visualization Libraries

In [None]:
my_visualization_library = 'Plotly / Plotly Express'
question = descriptions.iloc[59]     # Question 59-70
x, y = get_x_y(df, 59, 70)           # Number of Kagglers, Visualization Library choices
axes = ['Number of Kagglers', 'Visualization Library']

show_bar_chart(my_visualization_library, question[0:question.index('?')+1], x, y, ['Visualization Library choice of Kagglers'] * len(y), axes, True)

In [None]:
x, y = get_x_y(df_Turkey, 59, 70)           # Number of Kagglers, Visualization Library choices

show_bar_chart(my_visualization_library, question[0:question.index('?')+1], x, y, ['Visualization Library choice of Turkish Kagglers'] * len(y), axes, True)

# Machine Learning Frameworks

In [None]:
my_ml_framework = 'Scikit-learn'
question = descriptions.iloc[72]      # Question 72-89
values, labels = get_x_y(df, 72, 89)  # Number of Kagglers, Machine Learning Framework choices
axes = ['Number of Kagglers', 'Machine Learning Framework']

show_pie_chart(my_ml_framework, question[0:question.index('?')+1], values, labels, ['of Kagglers are using same machine learning framework as you.' for label in labels])

In [None]:
values, labels = get_x_y(df_Turkey, 72, 89)           # Number of Kagglers, Machine Learning Framework choices

show_pie_chart(my_ml_framework, question[0:question.index('?')+1], values, labels, ['of Turkish Kagglers are using same machine learning framework as you.' for label in labels])

# Workplace

In [None]:
my_industry = 'Other'
question = descriptions.iloc[115]
values = df.Q20.value_counts().values  # Number of Kagglers
labels = df.Q20.value_counts().index   # Industry

show_pie_chart(my_industry, question[0:question.index('?')+1], values, labels, ['of Kagglers are working in the same industry as you.' for label in labels])

In [None]:
values = df_Turkey.Q20.value_counts().values  # Number of Kagglers
labels = df_Turkey.Q20.value_counts().index   # Industry

show_pie_chart(my_industry, question[0:question.index('?')+1], values, labels, ['of Turkish Kagglers are working in the same industry as you.' for label in labels])

# Number of employees

In [None]:
my_company_size = '0-49 employees'
question = descriptions.iloc[116]
values = df.Q21.value_counts().values       # Number of Kagglers
labels = df.Q21.value_counts().index        # Company size

show_pie_chart(my_company_size, question[0:question.index('?')+1], values, labels, ['of Kagglers have same number of colleagues with you.' for label in labels])

In [None]:
values = df_Turkey.Q21.value_counts().values       # Number of Kagglers
labels = df_Turkey.Q21.value_counts().index        # Company size

show_pie_chart(my_company_size, question[0:question.index('?')+1], values, labels, ['of Turkish Kagglers have same number of colleagues with you.' for label in labels])

# Machine Learning adoption

In [None]:
my_gender = 'No (we do not use ML methods)'
question = descriptions.iloc[118]
x = df.Q23.value_counts().values       # Number of Kagglers
y = df.Q23.value_counts().index        # Machine Learning Adoption
y = replace_text(y, 'We are exploring ML methods (and may one day put a model into production)', 'We are exploring ML methods')
y = replace_text(y, 'We have well established ML methods (i.e., models in production for more than 2 years)', 'We have well established ML methods')
y = replace_text(y, 'We recently started using ML methods (i.e., models in production for less than 2 years)', 'We recently started using ML methods')
y = replace_text(y, 'We use ML methods for generating insights (but do not put working models into production)', 'We use ML methods for generating insights')
axes = ['Number of Kagglers', 'Machine learning methods']

show_bar_chart(my_gender, question[0:question.index('?')+1], x, y, ["Machine learning methods Kagglers' employers are incorporating."] * len(y), axes, True)

In [None]:
x = df_Turkey.Q23.value_counts().values       # Number of Kagglers

show_bar_chart(my_gender, question[0:question.index('?')+1], x, y, ["Machine learning methods Turkish Kagglers' employers are incorporating."] * len(y), axes, True)

# Role at work

In [None]:
my_role = 'None of these'
question = 'Which activities make up an important part of your role at work?' # Question 119-126
x, y = get_x_y(df, 119, 126)  # Number of Kagglers, Roles
y = replace_text(y, 'None of these activities are an important part of my role at work', 
                    'None of these')
y = replace_text(y, 'Do research that advances the state of the art of machine learning', 
                    'Do research that advances<br>the state of the art of machine learning')
y = replace_text(y, 'Experimentation and iteration to improve existing ML models', 
                    'Experimentation and iteration<br>to improve existing ML models')
y = replace_text(y, 'Build and/or run a machine learning service that operationally improves my product or workflows', 
                    'Build and/or run a machine learning service<br>that operationally improves my product or workflows')
y = replace_text(y, 'Build prototypes to explore applying machine learning to new areas', 
                    'Build prototypes to explore applying<br>machine learning to new areas')
y = replace_text(y, 'Build and/or run the data infrastructure that my business uses for storing, analyzing, and operationalizing data', 
                    'Build and/or run the data infrastructure that my business<br>uses for storing, analyzing, and operationalizing data')
y = replace_text(y, 'Analyze and understand data to influence product or business decisions', 
                    'Analyze and understand data<br>to influence product or business decisions')
axes = ['Number of Kagglers', 'Roles at work']

show_bar_chart(my_role, question, x, y, ['Kagglers who have the same role at their work.'] * len(y), axes, True)

In [None]:
x, y2 = get_x_y(df_Turkey, 119, 126)  # Number of Kagglers, Machine Learning Framework choices

show_bar_chart(my_role, question, x, y, ['Turkish Kagglers who have the same role at their work.'] * len(y), axes, True)

# Data Science Team Size

In [None]:
my_data_science_team = '0'
question = descriptions.iloc[117]
values = df.Q22.value_counts().values       # Number of Kagglers
labels = df.Q22.value_counts().index        # Data Science Team Size

show_pie_chart(my_data_science_team, question, values, labels, ['of Kagglers have the same data science team size with you.' for label in labels])

In [None]:
values = df_Turkey.Q22.value_counts().values       # Number of Kagglers
labels = df_Turkey.Q22.value_counts().index        # Data Science Team Size

show_pie_chart(my_data_science_team, question, values, labels, ['of Turkish Kagglers have the same data science team size with you.' for label in labels])

# Acknowledgements

https://www.kaggle.com/mpwolke/kaggle-survey-2021-brazil#Role-at-Work  

https://www.kaggle.com/siddhantsadangi/kaggle-2021-any-country-vs-the-world-v3-0  

https://www.kaggle.com/wildbenji/graph-generator-widget-grouping-options  
https://www.kaggle.com/toomuchsauce/mental-health-in-tech-plotly-interactive-viz  