In [50]:
# Import Statements
%config InlineBackend.figure_format = 'retina'
from helper_functions import *
from scipy.stats import ks_2samp
from subprocess import check_output
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [51]:
# Import the dataset
stack = pd.read_csv("./developer_survey_2018/survey_results_public.csv")
list(stack.columns)

['Respondent',
 'Hobby',
 'OpenSource',
 'Country',
 'Student',
 'Employment',
 'FormalEducation',
 'UndergradMajor',
 'CompanySize',
 'DevType',
 'YearsCoding',
 'YearsCodingProf',
 'JobSatisfaction',
 'CareerSatisfaction',
 'HopeFiveYears',
 'JobSearchStatus',
 'LastNewJob',
 'AssessJob1',
 'AssessJob2',
 'AssessJob3',
 'AssessJob4',
 'AssessJob5',
 'AssessJob6',
 'AssessJob7',
 'AssessJob8',
 'AssessJob9',
 'AssessJob10',
 'AssessBenefits1',
 'AssessBenefits2',
 'AssessBenefits3',
 'AssessBenefits4',
 'AssessBenefits5',
 'AssessBenefits6',
 'AssessBenefits7',
 'AssessBenefits8',
 'AssessBenefits9',
 'AssessBenefits10',
 'AssessBenefits11',
 'JobContactPriorities1',
 'JobContactPriorities2',
 'JobContactPriorities3',
 'JobContactPriorities4',
 'JobContactPriorities5',
 'JobEmailPriorities1',
 'JobEmailPriorities2',
 'JobEmailPriorities3',
 'JobEmailPriorities4',
 'JobEmailPriorities5',
 'JobEmailPriorities6',
 'JobEmailPriorities7',
 'UpdateCV',
 'Currency',
 'Salary',
 'SalaryType',

## Data Preprocessing

Feature Selection - We've only kept the columns that we will be using for the analysis and the recommender system and have removed all the other columns.

Categorize values - Assigned numerical values to categorical values.

In [52]:
kept_columns = ['Respondent', 'Student', 'Hobby', 'Country', 'Employment', 
                'FormalEducation', 'UndergradMajor', 'CompanySize', 'YearsCoding', 'YearsCodingProf', 
                'DevType', 'CareerSatisfaction', 'JobSatisfaction', 'EducationTypes', 'SelfTaughtTypes', 
                'LanguageWorkedWith', 'LanguageDesireNextYear', 'IDE', 'Gender', 'EducationParents', 'RaceEthnicity']

stack = stack[kept_columns]
stack.set_index('Respondent', inplace=True)

In [65]:
satisfaction_strs = {
    'Extremely satisfied': 7,
    'Moderately satisfied': 6,
    'Slightly satisfied': 5,
    'Neither satisfied nor dissatisfied': 4,
    'Slightly dissatisfied': 3,
    'Moderately dissatisfied' : 2,
    'Extremely dissatisfied' : 1
}
satisfy_key = ['JobSatisfaction', 'CareerSatisfaction']
replacers = [
    (satisfy_key, satisfaction_strs)]
def dict_map(dict_to_use):
    def mapper(val):
        if val in dict_to_use:
            return dict_to_use[val]
        else:
            return np.nan

    return mapper

for keys, strs in replacers:
    stack[keys] = stack[keys].applymap(dict_map(strs))

In [66]:
stack.head(5)

Unnamed: 0_level_0,Student,Hobby,Country,Employment,FormalEducation,UndergradMajor,CompanySize,YearsCoding,YearsCodingProf,DevType,CareerSatisfaction,JobSatisfaction,EducationTypes,SelfTaughtTypes,LanguageWorkedWith,LanguageDesireNextYear,IDE,Gender,EducationParents,RaceEthnicity
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,No,Yes,Kenya,Employed part-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Mathematics or statistics,20 to 99 employees,3-5 years,3-5 years,Full-stack developer,7.0,7.0,"Taught yourself a new language, framework, or ...",The official documentation and/or standards fo...,JavaScript;Python;HTML;CSS,JavaScript;Python;HTML;CSS,Komodo;Vim;Visual Studio Code,Male,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Black or of African descent
3,No,Yes,United Kingdom,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A natural science (ex. biology, chemistry, phy...","10,000 or more employees",30 or more years,18-20 years,Database administrator;DevOps specialist;Full-...,4.0,2.0,"Taught yourself a new language, framework, or ...",The official documentation and/or standards fo...,JavaScript;Python;Bash/Shell,Go;Python,IPython / Jupyter;Sublime Text;Vim,Male,"Bachelor’s degree (BA, BS, B.Eng., etc.)",White or of European descent
4,No,Yes,United States,Employed full-time,Associate degree,"Computer science, computer engineering, or sof...",20 to 99 employees,24-26 years,6-8 years,Engineering manager;Full-stack developer,6.0,6.0,,,,,,,,
5,No,No,United States,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",100 to 499 employees,18-20 years,12-14 years,Full-stack developer,3.0,4.0,Completed an industry certification program (e...,The official documentation and/or standards fo...,C#;JavaScript;SQL;TypeScript;HTML;CSS;Bash/Shell,C#;JavaScript;SQL;TypeScript;HTML;CSS;Bash/Shell,Visual Studio;Visual Studio Code,Male,Some college/university study without earning ...,White or of European descent
7,"Yes, part-time",Yes,South Africa,Employed full-time,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","10,000 or more employees",6-8 years,0-2 years,Data or business analyst;Desktop or enterprise...,6.0,5.0,Taken a part-time in-person course in programm...,The official documentation and/or standards fo...,C;C++;Java;Matlab;R;SQL;Bash/Shell,Assembly;C;C++;Matlab;SQL;Bash/Shell,Notepad++;Visual Studio;Visual Studio Code,Male,Some college/university study without earning ...,White or of European descent


In [67]:
# Split the dataframe into Student Developers and Professional Developers
prof_stack = stack[stack.Student == "No"]
stud_stack = stack[stack.Student.str.contains('Yes') == True]

In [77]:
survey_ques = pd.read_csv("./developer_survey_2018/survey_results_schema.csv")
survey_ques.head()

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,Hobby,Do you code as a hobby?
2,OpenSource,Do you contribute to open source projects?
3,Country,In which country do you currently reside?
4,Student,"Are you currently enrolled in a formal, degree..."


In [69]:
stud_stack.head()

Unnamed: 0_level_0,Student,Hobby,Country,Employment,FormalEducation,UndergradMajor,CompanySize,YearsCoding,YearsCodingProf,DevType,CareerSatisfaction,JobSatisfaction,EducationTypes,SelfTaughtTypes,LanguageWorkedWith,LanguageDesireNextYear,IDE,Gender,EducationParents,RaceEthnicity
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
7,"Yes, part-time",Yes,South Africa,Employed full-time,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","10,000 or more employees",6-8 years,0-2 years,Data or business analyst;Desktop or enterprise...,6.0,5.0,Taken a part-time in-person course in programm...,The official documentation and/or standards fo...,C;C++;Java;Matlab;R;SQL;Bash/Shell,Assembly;C;C++;Matlab;SQL;Bash/Shell,Notepad++;Visual Studio;Visual Studio Code,Male,Some college/university study without earning ...,White or of European descent
21,"Yes, full-time",No,Netherlands,Employed full-time,"Secondary school (e.g. American high school, G...",,20 to 99 employees,0-2 years,0-2 years,Back-end developer;Front-end developer;Student,2.0,4.0,Received on-the-job training in software devel...,,Java;JavaScript;PHP;VB.NET;HTML;CSS,,Notepad++;Visual Studio,Male,Associate degree,White or of European descent
26,"Yes, full-time",No,United States,Employed part-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...","1,000 to 4,999 employees",0-2 years,,Student,,,,,,,,,,
29,"Yes, full-time",Yes,India,Employed full-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)",,"10,000 or more employees",0-2 years,3-5 years,Data or business analyst;Data scientist or mac...,7.0,5.0,Contributed to open source software,,C;C++;C#,Python;R,Android Studio;Coda;Xcode,Female,Some college/university study without earning ...,
31,"Yes, full-time",Yes,Chile,Employed part-time,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A business discipline (ex. accounting, finance...",Fewer than 10 employees,3-5 years,,Back-end developer;Front-end developer;Full-st...,,,,,,,,,,


In [70]:
# Selecting only limited features from the above sub data frames and copying important features into 2 new dataframes
important_features_prof = ['Hobby', 'Country', 'FormalEducation', 'UndergradMajor','YearsCoding', 'YearsCodingProf',
                           'DevType', 'CareerSatisfaction', 'JobSatisfaction','LanguageWorkedWith', 
                           'LanguageDesireNextYear']


important_features_stud = ['Hobby', 'Country', 'FormalEducation', 'YearsCoding','LanguageWorkedWith', 
                           'LanguageDesireNextYear']

final_prof_stack = prof_stack[important_features_prof].copy()
final_stud_stack = stud_stack[important_features_stud].copy()

In [71]:
# Student Developers network
preprocessed_dfs_stud = preprocessed(final_stud_stack, ["LanguageWorkedWith", "LanguageDesireNextYear"],
                                     'LanguageDesireNextYear', False)

In [72]:
# Professional Developers network
preprocessed_dfs_prof = preprocessed(final_prof_stack, ["LanguageWorkedWith", "LanguageDesireNextYear", 'DevType'],
                                     'LanguageDesireNextYear', True)

# Predict

In this section, we show an example of a recommendation. Based on the recruiter's requirements - for both students and professionals, it recommends them suitable candidates.

# Student

These are the requirements of the recruiter based on the questionnaire:
- Do you code as a Hobby? : **Yes**
- In which country do you currently reside? : ** United States**
- Which of the following best describes the highest level of formal education that you’ve completed ? : **Bachelor’s degree (BA, BS, B.Eng., etc.)**
- Including any education, for how many years have you been coding ? : **3-5 years**
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **C#;Java**
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **C++;Python**

In [74]:
final_stud_stack.loc[final_stud_stack.shape[0]] = ['Yes', 'United States', 'Bachelor’s degree (BA, BS, B.Eng., etc.)',
                                                   '3-5 years', 'C#;Java', 'C++;Python']
predict_dfs_stud = preprocessed(final_stud_stack, ["LanguageWorkedWith", "LanguageDesireNextYear"],
                                          'LanguageDesireNextYear', False)
knn_stud = compute_knn_graph(predict_dfs_stud[0])

best_predict_stud = np.argsort(knn_stud.toarray()[-1])[::-1][1:6]
size_stud = preprocessed_dfs_stud[1].shape[0]
predict_dfs_stud[1].iloc[[size_stud-1] + list(best_predict_stud), :]

Unnamed: 0_level_0,Hobby,Country,FormalEducation,YearsCoding,LanguageWorkedWith,LanguageDesireNextYear
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
31110,Yes,Bolivia,Some college/university study without earning ...,3-5 years,Delphi/Object Pascal,Delphi/Object Pascal
5284,Yes,United States,"Bachelor’s degree (BA, BS, B.Eng., etc.)",3-5 years,Assembly,Assembly
13363,Yes,United States,"Bachelor’s degree (BA, BS, B.Eng., etc.)",3-5 years,Go,Go
12534,Yes,Indonesia,"Bachelor’s degree (BA, BS, B.Eng., etc.)",3-5 years,Java,JavaScript
14341,Yes,United States,"Bachelor’s degree (BA, BS, B.Eng., etc.)",3-5 years,JavaScript;HTML;CSS,TypeScript
13655,Yes,United States,"Bachelor’s degree (BA, BS, B.Eng., etc.)",3-5 years,C#;Python,C++;F#


All of these people are the ones recommended through our recommender system and they satisfy a lot of the requirements specified by the user.

# Professional

These are the requirements of the recruiter based on the questionnaire:
- Do you code as a Hobby? : **Yes**
- In which country do you currently reside? : ** United States**
- Which of the following best describes the highest level of formal education that you’ve completed ? : **Master’s degree (MA, MS, M.Eng., MBA, etc.)**
- Which of the following best describes your main field of study (aka 'major') in college or university/for your undergraduate studies? : Computer science, computer engineering, or software engineering
- Including any education, for how many years have you been coding ? : **6-8 years**
- For how many years have you coded professionally (as a part of your work) ? : **3-5 years**
- Which of the following describe you ? : **Full-stack developer**
- How satisfied are you with your current job? If you work more than one job, please answer regarding the one you spend the most hours on. : **8.0**
- Overall, how satisfied are you with your career thus far ? : **9.0** 
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **Java;PHP;Python**
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **Go;Python;Lua**

In [75]:
important_features_prof = ['Hobby', 'Country', 'FormalEducation', 'UndergradMajor','YearsCoding', 'YearsCodingProf',
                           'DevType', 'CareerSatisfaction', 'JobSatisfaction','LanguageWorkedWith', 
    # Student

These are the requirements of the recruiter based on the questionnaire:
- Do you code as a Hobby? : **Yes**
- In which country do you currently reside? : ** United States**
- Which of the following best describes the highest level of formal education that you’ve completed ? : **Bachelor’s degree (BA, BS, B.Eng., etc.)**
- Including any education, for how many years have you been coding ? : **3-5 years**
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **C#;Java**
- Which of the following languages have you done extensive development work in over the past year, and which do you want to work in over the next year? : **C++;Python**                       'LanguageDesireNextYear']

final_prof_stack.loc[final_prof_stack.shape[0]] = ['Yes', 'India', "Master’s degree (MA, MS, M.Eng., MBA, etc.)", 
                                                   'Computer science, computer engineering, or software engineering',
                                                   '6-8 years', '3-5 years', 'Full-stack developer', 
                                                   8.0, 9.0, 'Java;PHP;Python','Go;Python;Lua']

predict_dfs_prof = preprocessed(final_prof_stack, ["LanguageWorkedWith", "LanguageDesireNextYear", "DevType"],
                                          'LanguageDesireNextYear', True)

knn_prof = compute_knn_graph(predict_dfs_prof[0])
best_predict_prof = np.argsort(knn_prof.toarray()[-1])[::-1][1:6]

size_prof = preprocessed_dfs_prof[1].shape[0]
predict_dfs_prof[1].iloc[[size_prof-1] + list(best_predict_prof), :]

Unnamed: 0_level_0,Hobby,Country,FormalEducation,UndergradMajor,YearsCoding,YearsCodingProf,DevType,CareerSatisfaction,JobSatisfaction,LanguageWorkedWith,LanguageDesireNextYear
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
101512,Yes,United States,Some college/university study without earning ...,"Information systems, information technology, o...",30 or more years,30 or more years,Full-stack developer,4.0,2.0,C#;JavaScript;SQL;HTML;CSS,C#;JavaScript;SQL;TypeScript;HTML;CSS
43167,Yes,United States,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",18-20 years,15-17 years,Back-end developer;Desktop or enterprise appli...,4.0,2.0,Java;JavaScript;SQL;HTML;CSS,Java;JavaScript;SQL;HTML;CSS
28507,Yes,Germany,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",18-20 years,15-17 years,Back-end developer;Front-end developer;Full-st...,6.0,6.0,C#;Java;JavaScript;SQL;HTML;CSS,C#;Java;JavaScript;SQL;TypeScript;HTML;CSS
86867,Yes,Sweden,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",9-11 years,6-8 years,Back-end developer;Front-end developer;Full-st...,4.0,4.0,Java;JavaScript;SQL;HTML;CSS,Java;JavaScript;SQL;HTML;CSS
62611,Yes,Denmark,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",27-29 years,15-17 years,Back-end developer;Front-end developer;Full-st...,6.0,6.0,Java;JavaScript;SQL;TypeScript;HTML;CSS,Java;JavaScript;SQL;TypeScript;HTML;CSS
29760,Yes,India,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",0-2 years,0-2 years,Back-end developer;Data scientist or machine l...,2.0,1.0,Java;JavaScript;SQL;HTML;CSS,Java;JavaScript;SQL;HTML;CSS


All of these people are the ones recommended through our recommender system and they satisfy a lot of the requirements specified by the user.