In [1]:
# Import dependencies
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

from pprint import pprint

app = Flask(__name__)

In [2]:
#################################################
# Database Setup
#################################################

app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///../db/developers.sqlite"
db = SQLAlchemy(app)

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(db.engine, reflect=True)

# Save references to each table
Survey = Base.classes.survey

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [11]:
# find devtypes, languageworkedwith, databaseworkedwith, frameworkWorkededWith
s = [Survey.Respondent,
     Survey.DevType,
     Survey.LanguageWorkedWith,
     Survey.ConvertedSalary
    ]

radarResults = db.session.query(*s).filter(Survey.ConvertedSalary != '').all()

In [13]:
# put results into pandas
df = pd.DataFrame(radarResults)
df['ConvertedSalary'] = pd.to_numeric(df['ConvertedSalary'])
df.head()

Unnamed: 0,Respondent,DevType,LanguageWorkedWith,ConvertedSalary
0,2999,Data or business analyst;Educator or academic ...,C;C++;Java;SQL;Swift,8400
1,2943,Back-end developer;Database administrator;Desk...,C++;Java;JavaScript;HTML;CSS,4980
2,3203,Back-end developer,C++;Java;Python;SQL;Bash/Shell,36716
3,3010,Mobile developer,C;C#;Java;Objective-C;Rust;Swift;TypeScript,51408
4,3067,Mobile developer,C;JavaScript;Objective-C;Swift,10175


In [16]:
# explode the devtype column so there's only one devtype for each row
expdev_df = pd.DataFrame(df.DevType.str.split(';').tolist(), index=[df.Respondent, df.LanguageWorkedWith, df.ConvertedSalary]).stack()

expdev_df = expdev_df.reset_index()

expdev_df = expdev_df.rename(columns={0:'DevType'})
expdev_df = expdev_df.drop(columns='level_3')

expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer
4,2999,C;C++;Java;SQL;Swift,8400,System administrator


In [17]:
expdev_df['LanguageList'] = expdev_df.LanguageWorkedWith.str.split(';')
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]"
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]"


In [18]:
# hard code top 10 languages from stack ooverflow website
topLang = ['JavaScript', 'HTML', 'CSS', 'SQL', 'Java', 'Bash/Shell', 'Python', 'C#', 'PHP', 'C++']

In [19]:
# Test case for one language
# Check if language is in each entries list of known languagesLanguage List. If yes, return 1, otherwise, return 0
# expdev_df['Python'] = np.where(expdev_df.LanguageList.apply(lambda x: 'Python' in x), 1, 0)

In [20]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]"
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]"


In [36]:
# Loop through each language and see if each respondent knows the language
for language in topLang:
    
    # dynamically create new column headers
    print('Checking for: ' + language + '...')        
    
    # Create new column to check if the respondent knows a language. If the languege is in the list of known languages, it gets 1, else it gets 0
    expdev_df[language] = np.where(expdev_df.LanguageList.apply(lambda x: language in x), expdev_df['ConvertedSalary'], False)
    
    print(f'{language} done!')


Checking for: JavaScript...
JavaScript done!
Checking for: HTML...
HTML done!
Checking for: CSS...
CSS done!
Checking for: SQL...
SQL done!
Checking for: Java...
Java done!
Checking for: Bash/Shell...
Bash/Shell done!
Checking for: Python...
Python done!
Checking for: C#...
C# done!
Checking for: PHP...
PHP done!
Checking for: C++...
C++ done!


In [37]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]",0,0,0,8400,8400,0,0,0,0,8400
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]",0,0,0,8400,8400,0,0,0,0,8400
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]",0,0,0,8400,8400,0,0,0,0,8400
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]",0,0,0,8400,8400,0,0,0,0,8400
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]",0,0,0,8400,8400,0,0,0,0,8400


In [41]:
# Aggregation by mean gives a fraction of dev types who know each skill
SalLangDev = expdev_df.groupby('DevType').mean()
SalLangDev = SalLangDev.drop(index='', columns=['Respondent', 'ConvertedSalary'])

# Round each percent to 2 decimals
SalLangDev = SalLangDev.round(decimals=2)

SalLangDev

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Back-end developer,73481.19,67139.63,64825.8,64307.02,41391.46,49365.19,39495.6,38546.81,26929.59,19805.01
"C-suite executive (CEO, CTO, etc.)",98091.05,94169.21,90448.27,85547.61,43578.51,71609.13,52279.62,42635.93,47916.48,29587.86
Data or business analyst,71654.11,72523.21,68709.13,84658.58,36667.01,52238.09,52950.11,47434.17,29234.06,20569.45
Data scientist or machine learning specialist,55602.94,54471.33,50266.26,62032.21,40366.88,58895.16,79183.45,25126.51,16698.54,32021.42
Database administrator,77198.53,75057.52,72808.19,80082.41,34837.74,54528.54,42782.79,43244.92,41039.65,19347.83
Designer,72320.23,72668.23,70723.38,58152.92,31828.03,40900.48,31807.69,40188.87,32660.28,21222.63
Desktop or enterprise applications developer,68476.5,67139.34,64398.71,65229.29,38894.82,41428.69,32364.44,61676.72,20546.78,31178.13
DevOps specialist,89563.16,81110.92,77951.3,78231.04,50573.58,81367.38,57876.49,43424.51,34085.4,22542.37
Educator or academic researcher,56177.21,56736.81,52379.82,45408.61,31512.54,50365.2,46978.08,23608.8,22224.22,31910.66
Embedded applications or devices developer,49768.28,49039.04,45420.92,43492.31,40867.82,56167.77,54519.87,36778.93,18572.26,60196.43


In [42]:
# Convert df to dict to serve as json. Orient by index to make it easily searchable by devtype
SalLangDev_dict = SalLangDev.to_dict(orient='index')

pprint(SalLangDev)

                                               JavaScript      HTML       CSS  \
DevType                                                                         
Back-end developer                               73481.19  67139.63  64825.80   
C-suite executive (CEO, CTO, etc.)               98091.05  94169.21  90448.27   
Data or business analyst                         71654.11  72523.21  68709.13   
Data scientist or machine learning specialist    55602.94  54471.33  50266.26   
Database administrator                           77198.53  75057.52  72808.19   
Designer                                         72320.23  72668.23  70723.38   
Desktop or enterprise applications developer     68476.50  67139.34  64398.71   
DevOps specialist                                89563.16  81110.92  77951.30   
Educator or academic researcher                  56177.21  56736.81  52379.82   
Embedded applications or devices developer       49768.28  49039.04  45420.92   
Engineering manager         