In [1]:
# Import dependencies
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

from pprint import pprint

app = Flask(__name__)

In [2]:
#################################################
# Database Setup
#################################################

app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///../db/developers.sqlite"
db = SQLAlchemy(app)

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(db.engine, reflect=True)

# Save references to each table
Survey = Base.classes.survey

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [3]:
# find devtypes, languageworkedwith, databaseworkedwith, frameworkWorkededWith
s = [Survey.Respondent,
     Survey.DevType,
     Survey.LanguageWorkedWith,
     Survey.ConvertedSalary
    ]

radarResults = db.session.query(*s).filter(Survey.ConvertedSalary != '').all()

In [4]:
# put results into pandas
df = pd.DataFrame(radarResults)
df['ConvertedSalary'] = pd.to_numeric(df['ConvertedSalary'])
df.head()

Unnamed: 0,Respondent,DevType,LanguageWorkedWith,ConvertedSalary
0,2999,Data or business analyst;Educator or academic ...,C;C++;Java;SQL;Swift,8400
1,2943,Back-end developer;Database administrator;Desk...,C++;Java;JavaScript;HTML;CSS,4980
2,3203,Back-end developer,C++;Java;Python;SQL;Bash/Shell,36716
3,3010,Mobile developer,C;C#;Java;Objective-C;Rust;Swift;TypeScript,51408
4,3067,Mobile developer,C;JavaScript;Objective-C;Swift,10175


In [5]:
# explode the devtype column so there's only one devtype for each row
expdev_df = pd.DataFrame(df.DevType.str.split(';').tolist(), index=[df.Respondent, df.LanguageWorkedWith, df.ConvertedSalary]).stack()

expdev_df = expdev_df.reset_index()

expdev_df = expdev_df.rename(columns={0:'DevType'})
expdev_df = expdev_df.drop(columns='level_3')

expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer
4,2999,C;C++;Java;SQL;Swift,8400,System administrator


In [6]:
expdev_df['LanguageList'] = expdev_df.LanguageWorkedWith.str.split(';')
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]"
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]"


In [7]:
# hard code top 10 languages from stack ooverflow website
topLang = ['JavaScript', 'HTML', 'CSS', 'SQL', 'Java', 'Bash/Shell', 'Python', 'C#', 'PHP', 'C++']

In [8]:
# Test case for one language
# Check if language is in each entries list of known languagesLanguage List. If yes, return 1, otherwise, return 0
# expdev_df['Python'] = np.where(expdev_df.LanguageList.apply(lambda x: 'Python' in x), 1, 0)

In [9]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]"
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]"


In [10]:
# Loop through each language and see if each respondent knows the language
for language in topLang:
    
    cName = 'salary' + language
    # dynamically create new column headers
    print('Checking for: ' + language + '...')        
    
    # Create new column to check if the respondent knows a language. If the languege is in the list of known languages, it gets 1, else it gets 0
    expdev_df[language] = np.where(expdev_df.LanguageList.apply(lambda x: language in x), 1, 0)
    expdev_df[cName] = np.where(expdev_df.LanguageList.apply(lambda x: language in x), expdev_df['ConvertedSalary'], 0)

    print(f'{language} done!')


Checking for: JavaScript...
JavaScript done!
Checking for: HTML...
HTML done!
Checking for: CSS...
CSS done!
Checking for: SQL...
SQL done!
Checking for: Java...
Java done!
Checking for: Bash/Shell...
Bash/Shell done!
Checking for: Python...
Python done!
Checking for: C#...
C# done!
Checking for: PHP...
PHP done!
Checking for: C++...
C++ done!


In [11]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,ConvertedSalary,DevType,LanguageList,JavaScript,salaryJavaScript,HTML,salaryHTML,CSS,...,Bash/Shell,salaryBash/Shell,Python,salaryPython,C#,salaryC#,PHP,salaryPHP,C++,salaryC++
0,2999,C;C++;Java;SQL;Swift,8400,Data or business analyst,"[C, C++, Java, SQL, Swift]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8400
1,2999,C;C++;Java;SQL;Swift,8400,Educator or academic researcher,"[C, C++, Java, SQL, Swift]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8400
2,2999,C;C++;Java;SQL;Swift,8400,Engineering manager,"[C, C++, Java, SQL, Swift]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8400
3,2999,C;C++;Java;SQL;Swift,8400,Mobile developer,"[C, C++, Java, SQL, Swift]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8400
4,2999,C;C++;Java;SQL;Swift,8400,System administrator,"[C, C++, Java, SQL, Swift]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8400


In [12]:
# Aggregate by sum. This will get both the number of coders for a language but also the total salary for that language
totals = expdev_df.groupby('DevType').sum()
# drop unnecessary colums
totals = totals.drop(columns=['Respondent', 'ConvertedSalary'], index='')

totals

Unnamed: 0_level_0,JavaScript,salaryJavaScript,HTML,salaryHTML,CSS,salaryCSS,SQL,salarySQL,Java,salaryJava,Bash/Shell,salaryBash/Shell,Python,salaryPython,C#,salaryC#,PHP,salaryPHP,C++,salaryC++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Back-end developer,22051,2129999243,20743,1946176392,19880,1879105409,19216,1864067573,13126,1199814225,13335,1430948794,11218,1144858902,11256,1117356393,9595,780607983,6301,574087880
"C-suite executive (CEO, CTO, etc.)",1324,158907493,1282,152554122,1242,146526190,1135,138587134,643,70597188,876,116006787,659,84692982,588,69070214,722,77624699,416,47932326
Data or business analyst,2635,292922001,2722,296474863,2546,280882905,3135,346084270,1506,149894746,1763,213549293,2001,216460039,1559,193910905,1166,119508855,828,84087919
Data scientist or machine learning specialist,2009,209233855,2024,204975597,1850,189151952,2230,233427206,1560,151900581,2020,221622479,2871,297967330,917,94551058,698,62836612,1229,120496600
Database administrator,5695,550811485,5704,535535383,5522,519486455,5914,571388003,2842,248567294,3586,389061142,2867,305255197,3040,308552479,3332,292817887,1577,138046770
Designer,4337,397038051,4470,398948565,4369,388271379,3559,319259519,2301,174735874,2201,224543626,1874,174624210,2213,220636921,2271,179304945,1425,116512220
Desktop or enterprise applications developer,6034,603072497,5979,591296192,5708,567159450,5905,574474352,3818,342546650,3554,364862446,2962,285033585,5337,543186874,2180,180955532,2963,274585781
DevOps specialist,4645,545170941,4312,493722169,4123,474489543,4034,476192349,2698,307841409,4187,495283252,3182,352294166,2086,264324990,2003,207477823,1211,137215384
Educator or academic researcher,1067,96849508,1120,97814267,1065,90302814,918,78284438,768,54327625,949,86829601,975,80990204,474,40701565,506,38314559,648,55013986
Embedded applications or devices developer,1415,132333853,1362,130394796,1245,120774231,1147,115646046,1126,108667530,1510,149350110,1526,144968329,1034,97795166,636,49383649,1689,160062320


In [13]:
counts = totals[topLang]

counts

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Back-end developer,22051,20743,19880,19216,13126,13335,11218,11256,9595,6301
"C-suite executive (CEO, CTO, etc.)",1324,1282,1242,1135,643,876,659,588,722,416
Data or business analyst,2635,2722,2546,3135,1506,1763,2001,1559,1166,828
Data scientist or machine learning specialist,2009,2024,1850,2230,1560,2020,2871,917,698,1229
Database administrator,5695,5704,5522,5914,2842,3586,2867,3040,3332,1577
Designer,4337,4470,4369,3559,2301,2201,1874,2213,2271,1425
Desktop or enterprise applications developer,6034,5979,5708,5905,3818,3554,2962,5337,2180,2963
DevOps specialist,4645,4312,4123,4034,2698,4187,3182,2086,2003,1211
Educator or academic researcher,1067,1120,1065,918,768,949,975,474,506,648
Embedded applications or devices developer,1415,1362,1245,1147,1126,1510,1526,1034,636,1689


In [14]:
# empty list to hold column names
salaryList = []

# concat salery to each language so it matches the prev for loop
for l in topLang:
    c = 'salary' + l
    salaryList.append(c)

sals = totals[salaryList]
sals.columns = topLang

sals

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Back-end developer,2129999243,1946176392,1879105409,1864067573,1199814225,1430948794,1144858902,1117356393,780607983,574087880
"C-suite executive (CEO, CTO, etc.)",158907493,152554122,146526190,138587134,70597188,116006787,84692982,69070214,77624699,47932326
Data or business analyst,292922001,296474863,280882905,346084270,149894746,213549293,216460039,193910905,119508855,84087919
Data scientist or machine learning specialist,209233855,204975597,189151952,233427206,151900581,221622479,297967330,94551058,62836612,120496600
Database administrator,550811485,535535383,519486455,571388003,248567294,389061142,305255197,308552479,292817887,138046770
Designer,397038051,398948565,388271379,319259519,174735874,224543626,174624210,220636921,179304945,116512220
Desktop or enterprise applications developer,603072497,591296192,567159450,574474352,342546650,364862446,285033585,543186874,180955532,274585781
DevOps specialist,545170941,493722169,474489543,476192349,307841409,495283252,352294166,264324990,207477823,137215384
Educator or academic researcher,96849508,97814267,90302814,78284438,54327625,86829601,80990204,40701565,38314559,55013986
Embedded applications or devices developer,132333853,130394796,120774231,115646046,108667530,149350110,144968329,97795166,49383649,160062320


In [15]:
salLangDev = sals/counts

salLangDev

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Back-end developer,96594.224434,93823.284578,94522.404879,97006.014415,91407.452766,107307.746082,102055.52701,99267.625533,81355.704325,91110.598318
"C-suite executive (CEO, CTO, etc.)",120020.765106,118996.975039,117975.99839,122103.201762,109793.449456,132427.839041,128517.423369,117466.35034,107513.433518,115221.9375
Data or business analyst,111165.844782,108918.024614,110323.214847,110393.706539,99531.703851,121128.356778,108175.931534,124381.59397,102494.729846,101555.457729
Data scientist or machine learning specialist,104148.260329,101272.528162,102244.298378,104675.87713,97372.167308,109714.098515,103785.207245,103109.114504,90023.799427,98044.426363
Database administrator,96718.434592,93887.689867,94075.779609,96616.165539,87462.102041,108494.462354,106471.990582,101497.525987,87880.518307,87537.58402
Designer,91546.703021,89250.238255,88869.62211,89704.838157,75939.10213,102018.912313,93182.609392,99700.37099,78954.180978,81762.961404
Desktop or enterprise applications developer,99945.723732,98895.499582,99362.202172,97286.0884,89718.871137,102662.477772,96230.109723,101777.566798,83007.124771,92671.542693
DevOps specialist,117367.26394,114499.575371,115083.566093,118044.707238,114099.855078,118290.721758,110714.697046,126713.801534,103583.536196,113307.501239
Educator or academic researcher,90768.048735,87334.166964,84791.374648,85277.165577,70739.095052,91495.891465,83066.875897,85868.280591,75720.472332,84898.126543
Embedded applications or devices developer,93522.157597,95737.735683,97007.414458,100824.800349,96507.575488,98907.357616,94998.90498,94579.464217,77647.246855,94767.507401


In [16]:
# Aggregation by mean gives a fraction of dev types who know each skill
SalLangDev = expdev_df.groupby('DevType').mean()
SalLangDev = SalLangDev.drop(index='', columns=['Respondent', 'ConvertedSalary'])

# Round each percent to 2 decimals
SalLangDev = SalLangDev.round(decimals=2)

SalLangDev

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++,salaryJavaScript,salaryHTML,salaryCSS,salarySQL,salaryJava,salaryBash/Shell,salaryPython,salaryC#,salaryPHP,salaryC++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Back-end developer,0.76,0.72,0.69,0.66,0.45,0.46,0.39,0.39,0.33,0.22,73481.19,67139.63,64825.8,64307.02,41391.46,49365.19,39495.6,38546.81,26929.59,19805.01
"C-suite executive (CEO, CTO, etc.)",0.82,0.79,0.77,0.7,0.4,0.54,0.41,0.36,0.45,0.26,98091.05,94169.21,90448.27,85547.61,43578.51,71609.13,52279.62,42635.93,47916.48,29587.86
Data or business analyst,0.64,0.67,0.62,0.77,0.37,0.43,0.49,0.38,0.29,0.2,71654.11,72523.21,68709.13,84658.58,36667.01,52238.09,52950.11,47434.17,29234.06,20569.45
Data scientist or machine learning specialist,0.53,0.54,0.49,0.59,0.41,0.54,0.76,0.24,0.19,0.33,55602.94,54471.33,50266.26,62032.21,40366.88,58895.16,79183.45,25126.51,16698.54,32021.42
Database administrator,0.8,0.8,0.77,0.83,0.4,0.5,0.4,0.43,0.47,0.22,77198.53,75057.52,72808.19,80082.41,34837.74,54528.54,42782.79,43244.92,41039.65,19347.83
Designer,0.79,0.81,0.8,0.65,0.42,0.4,0.34,0.4,0.41,0.26,72320.23,72668.23,70723.38,58152.92,31828.03,40900.48,31807.69,40188.87,32660.28,21222.63
Desktop or enterprise applications developer,0.69,0.68,0.65,0.67,0.43,0.4,0.34,0.61,0.25,0.34,68476.5,67139.34,64398.71,65229.29,38894.82,41428.69,32364.44,61676.72,20546.78,31178.13
DevOps specialist,0.76,0.71,0.68,0.66,0.44,0.69,0.52,0.34,0.33,0.2,89563.16,81110.92,77951.3,78231.04,50573.58,81367.38,57876.49,43424.51,34085.4,22542.37
Educator or academic researcher,0.62,0.65,0.62,0.53,0.45,0.55,0.57,0.27,0.29,0.38,56177.21,56736.81,52379.82,45408.61,31512.54,50365.2,46978.08,23608.8,22224.22,31910.66
Embedded applications or devices developer,0.53,0.51,0.47,0.43,0.42,0.57,0.57,0.39,0.24,0.64,49768.28,49039.04,45420.92,43492.31,40867.82,56167.77,54519.87,36778.93,18572.26,60196.43


In [13]:
# Convert df to dict to serve as json. Orient by index to make it easily searchable by devtype
SalLangDev_dict = SalLangDev.to_dict(orient='index')

pprint(SalLangDev)

                                               JavaScript      HTML       CSS  \
DevType                                                                         
Back-end developer                               73481.19  67139.63  64825.80   
C-suite executive (CEO, CTO, etc.)               98091.05  94169.21  90448.27   
Data or business analyst                         71654.11  72523.21  68709.13   
Data scientist or machine learning specialist    55602.94  54471.33  50266.26   
Database administrator                           77198.53  75057.52  72808.19   
Designer                                         72320.23  72668.23  70723.38   
Desktop or enterprise applications developer     68476.50  67139.34  64398.71   
DevOps specialist                                89563.16  81110.92  77951.30   
Educator or academic researcher                  56177.21  56736.81  52379.82   
Embedded applications or devices developer       49768.28  49039.04  45420.92   
Engineering manager         