In [None]:
!pip install pycountry-convert

In [None]:
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("../input/stack-overflow-developer-survey-results-2019/survey_results_public.csv")

In [None]:
df.info()

## Q1.What is the average age of developers when they wrote their first line of code?

In [None]:
df.Age1stCode.unique()

As this column contains string values, a simple .mean() method will raise an error. So, 1st we will convert dtype from object to float while ignoring the string values with pd.to_numeric ( https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_numeric.html )

In [None]:
df.Age1stCode = pd.to_numeric(df.Age1stCode, errors = 'coerce')

average_age = df.Age1stCode.mean()

In [None]:
print(f"The average age of people when they write their first code is: {round(average_age)}.")

## Q2.Deduce the percentage of developers who know python in each country.

In [None]:
df.LanguageWorkedWith

In [None]:
def python(n):
    try:
        if 'Python' in n:
            return 1
        return 0
    except:
        return 0

In [None]:
df['%_knows_python'] = list(map(python,df.LanguageWorkedWith))

In [None]:
df.groupby('Country')['%_knows_python'].mean()*100

## Q3.Generate a report for the average salary of developer based on continent.


In [None]:
df.Country = df.Country.replace({"Congo, Republic of the...":"Congo",
                                                             "Hong Kong (S.A.R.)":"Hong Kong",
                                                             "Libyan Arab Jamahiriya":"Libya",
                                                             "Other Country (Not Listed Above)":"Others",
                                                             "Republic of Korea":"South Korea",
                                                             "The former Yugoslav Republic of Macedonia":"North Macedonia",
                                                             "Timor-Leste":"India",#As Timor-Leste is an Asian country but not showing in pycountry_convert module
                                                             "Venezuela, Bolivarian Republic of...":"Venezuela"})

In [None]:
comp_per_country = df.groupby('Country')['ConvertedComp'].mean().to_frame().reset_index()

In [None]:
comp_per_country

In [None]:
def get_continent(col):
    try:
        cn_a2_code =  country_name_to_country_alpha2(col)
    except:
        cn_a2_code = 'Others' 
    try:
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
    except:
        cn_continent = 'Others' 
    return (cn_continent)

In [None]:
comp_per_country['Continent'] = list(map(get_continent,comp_per_country.Country))

Average Compensation of a Developer per Continent:

In [None]:
comp_per_country.groupby('Continent')['ConvertedComp'].mean()

## Q4.Based on this survey, what will be the most desired programming language for the year 2020?

In [None]:
df.LanguageDesireNextYear

In [None]:
counts = {}
for i in df['LanguageDesireNextYear']:
    to_string = str(i)
    splited = to_string.split(';')
    for j in splited:
        if j in counts:
            counts[j] += 1
        else:
            counts[j] = 1

In [None]:
Language_desired_2020 = pd.DataFrame.from_dict(counts,orient='index',columns=['No. of Users'])

In [None]:
print(f"The most desired programming language for the year 2020 is: {Language_desired_2020.sort_values('No. of Users',ascending=False).reset_index().iloc[0,0]}.")

## Q5.What is the distribution of people who code as a hobby based on gender and continent?

In [None]:
df['Continent'] = list(map(get_continent,df.Country))

In [None]:
df.Hobbyist = df.Hobbyist.replace({"Yes": 1, "No": 0})

In [None]:
def sex(string):
    if string == 'Man':
        return 'Man'
    elif string == 'Woman':
        return 'Woman'
    else:
        return "Others"

In [None]:
df.Gender = list(map(sex,df.Gender))

Hobbyist based on Gender and Continent.

In [None]:
df.groupby(['Continent','Gender'])['Hobbyist'].count()

## Q6.Generate the report for job and career satisfaction of developer based on their gender and continent?

Job satisfaction based on gender and continent

In [None]:
df.groupby(['Continent','Gender','JobSat'])['Respondent'].count()

Career satisfaction based on gender and continent

In [None]:
df.groupby(['Continent','Gender','CareerSat'])['Respondent'].count()