# How much people on kaggle earn by country (2021)

It is not unusual when looking for work to find in the job descriptions "*Pay range unavailable*", or that the salary is "*competitive*". Not very helpful at all. Here we plot the how much people on kaggle earn by country (in \$USD), making use of the data from the [2021 Kaggle Machine Learning & Data Science Survey](https://www.kaggle.com/c/kaggle-survey-2021).

Furthermore, to each country, when available, I have included the *Big Mac multiplier* from The Economist's ["*The Big Mac index*"](https://www.economist.com/big-mac-index):

> "*The Big Mac index was invented by The Economist in 1986 as a lighthearted guide to whether currencies are at their “correct” level. It is based on the theory of purchasing-power parity (PPP), the notion that in the long run exchange rates should move towards the rate that would equalise the prices of an identical basket of goods and services (in this case, a burger) in any two countries.*"

thus providing a better idea of just how much one \$USD is really worth in each country. 

In [None]:
import numpy as np 
import pandas as pd
from IPython.display import display, Markdown

import seaborn as sns
import matplotlib.pyplot as plt

# colorines
sns.set_style("darkgrid")
sns.set(rc={'axes.facecolor':'lightsteelblue', 'figure.facecolor':'lavender'})
bar_colour = "Blue"

kaggle_survey_2021 = pd.read_csv("../input/kaggle-survey-2021/kaggle_survey_2021_responses.csv", low_memory=False)
#  delete the questions in row 0
kaggle_survey_2021   = kaggle_survey_2021.drop(0)

# save memory, make a smaller dataset
kaggle_survey_2021_Q3_Q25 = kaggle_survey_2021[["Q3","Q25"]].copy()

# create an alphabetical list of countries
Q3 = kaggle_survey_2021_Q3_Q25.Q3.unique().tolist()
Q3.remove('I do not wish to disclose my location')
Q3.remove('Other')
countries = sorted(Q3)

order = [#'$0-999', # drop this pay range
         '1,000-1,999','2,000-2,999','3,000-3,999','4,000-4,999','5,000-7,499',
         '7,500-9,999','10,000-14,999','15,000-19,999','20,000-24,999','25,000-29,999',
         '30,000-39,999','40,000-49,999','50,000-59,999','60,000-69,999','70,000-79,999',
         '80,000-89,999', '90,000-99,999','100,000-124,999','125,000-149,999','150,000-199,999',
         '200,000-249,999']

# Big Mac section begin
Big_Mac_data = pd.read_csv("../input/the-economists-big-mac-index/output-data/big-mac-full-index.csv")
Big_Mac_data_latest = Big_Mac_data.query("date == '2021-07-01' ").reset_index(drop = True)
# the price of a Big Mac in the USA
Big_Mac_in_USA = Big_Mac_data_latest.iloc[53,6]
Big_Mac_data_latest["multiplier"] = Big_Mac_in_USA / Big_Mac_data_latest["dollar_price"]
# make a small copy
multiplier = Big_Mac_data_latest[["name","multiplier"]].copy()
multiplier['multiplier'] = multiplier['multiplier'].round(decimals=2)
Euro_area = multiplier.query("name == 'Euro area' ").multiplier.item()

# fill in any missing data
Algeria = {'name': 'Algeria', 'multiplier': "N/A"}
Austria = {'name': 'Austria', 'multiplier': Euro_area}
Belgium = {'name': 'Belgium', 'multiplier': Euro_area}
Cyprus = {'name': 'Cyprus', 'multiplier': Euro_area}
Estonia = {'name': 'Estonia', 'multiplier': Euro_area}
Finland = {'name': 'Finland', 'multiplier': Euro_area}
France = {'name': 'France', 'multiplier': Euro_area}
Germany = {'name': 'Germany', 'multiplier': Euro_area}
Greece = {'name': 'Greece', 'multiplier': Euro_area}
Ireland = {'name': 'Ireland', 'multiplier': Euro_area}
Italy = {'name': 'Italy', 'multiplier': Euro_area}
Latvia = {'name': 'Latvia', 'multiplier': Euro_area}
Lithuania = {'name': 'Lithuania', 'multiplier': Euro_area}
Luxembourg = {'name': 'Luxembourg', 'multiplier': Euro_area}
Malta = {'name': 'Malta', 'multiplier': Euro_area}
Netherlands = {'name': 'Netherlands', 'multiplier': Euro_area}
Portugal = {'name': 'Portugal', 'multiplier': Euro_area}
Slovakia = {'name': 'Slovakia', 'multiplier': Euro_area}
Slovenia = {'name': 'Slovenia', 'multiplier': Euro_area}
Spain = {'name': 'Spain', 'multiplier': Euro_area}
Bangladesh = {'name': 'Bangladesh', 'multiplier': "N/A"}
Belarus = {'name': 'Belarus', 'multiplier': "N/A"}
Ecuador = {'name': 'Ecuador', 'multiplier': "N/A"}
Ethiopia = {'name': 'Ethiopia', 'multiplier': "N/A"}
Ghana = {'name': 'Ghana', 'multiplier': "N/A"}
multiplier["name"] = multiplier["name"].str.replace('Hong Kong','Hong Kong (S.A.R.)', regex=True)
Iran = {'name': 'Iran, Islamic Republic of...', 'multiplier': "N/A"}
Iraq = {'name': 'Iraq', 'multiplier': "N/A"}
Kazakhstan = {'name': 'Kazakhstan', 'multiplier': "N/A"}
Kenya = {'name': 'Kenya', 'multiplier': "N/A"}
Morocco = {'name': 'Morocco', 'multiplier': "N/A"}
Nepal = {'name': 'Nepal', 'multiplier': "N/A"}
Nigeria = {'name': 'Nigeria', 'multiplier': "N/A"}
Tunisia = {'name': 'Tunisia', 'multiplier': "N/A"}
Uganda = {'name': 'Uganda', 'multiplier': "N/A"}
multiplier["name"] = multiplier["name"].str.replace('Britain','United Kingdom of Great Britain and Northern Ireland', regex=True)
multiplier["name"] = multiplier["name"].str.replace('United States','United States of America', regex=True)
multiplier["name"] = multiplier["name"].str.replace('Vietnam','Viet Nam', regex=True)

# add these new countries to the dataframe
multiplier = multiplier.append([Algeria,Austria,Belgium,Cyprus,Estonia,Finland,France,
                              Germany,Greece,Ireland,Italy,Latvia,Lithuania,
                              Luxembourg,Malta,Netherlands,Portugal,Slovakia,
                              Slovenia,Spain,Bangladesh,Belarus,Ecuador,Ethiopia,Ghana,
                              Iran,Iraq,Kazakhstan,Kenya,Morocco,Nepal,Nigeria,
                              Tunisia,Uganda], ignore_index = True)
# Big Mac section end

question_num = 'Q25'
for country in countries:
    display(Markdown('# '+ country))
    # Big Mac stuff
    big_mac_val = multiplier.query("name == @country ").multiplier.item()
    display(Markdown('*Big Mac multiplier* = '+ str(big_mac_val)))
    # extract country
    country_df = kaggle_survey_2021_Q3_Q25.query("Q3 == @country").reset_index(drop = True)
    plt.figure(figsize=(15, 5))
    plt.suptitle(country, fontsize=20)
    #plt.title("(USD)", fontsize=14)
    sns.countplot(x=question_num, data=country_df,  color=bar_colour, order = order)
    plt.tick_params(axis='x', rotation=90)
    plt.xlabel("")
    plt.show()