In [1]:
# Import dependencies
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

from pprint import pprint

app = Flask(__name__)

In [2]:
#################################################
# Database Setup
#################################################

app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///../db/developers.sqlite"
db = SQLAlchemy(app)

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(db.engine, reflect=True)

# Save references to each table
Survey = Base.classes.survey

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [3]:
# find devtypes, languageworkedwith, databaseworkedwith, frameworkWorkededWith
s = [Survey.Respondent,
     Survey.DevType,
     Survey.LanguageWorkedWith,
     Survey.DatabaseWorkedWith,
     Survey.FrameworkWorkedWith
    ]

radarResults = db.session.query(*s).all()

In [4]:
# put results into pandas
df = pd.DataFrame(radarResults)
df.head()

Unnamed: 0,Respondent,DevType,LanguageWorkedWith,DatabaseWorkedWith,FrameworkWorkedWith
0,3047,Game or graphics developer,C;C++;C#;Java;Python;Swift;HTML;CSS,MySQL;SQLite,TensorFlow
1,2999,Data or business analyst;Educator or academic ...,C;C++;Java;SQL;Swift,SQLite;Oracle,
2,2943,Back-end developer;Database administrator;Desk...,C++;Java;JavaScript;HTML;CSS,MySQL;SQLite;Oracle,Spring
3,3203,Back-end developer,C++;Java;Python;SQL;Bash/Shell,Oracle,
4,3010,Mobile developer,C;C#;Java;Objective-C;Rust;Swift;TypeScript,,Xamarin


In [5]:
# explode the devtype column so there's only one devtype for each row
expdev_df = pd.DataFrame(df.DevType.str.split(';').tolist(), index=[df.Respondent, df.LanguageWorkedWith, df.DatabaseWorkedWith, df.FrameworkWorkedWith]).stack()

expdev_df = expdev_df.reset_index()

expdev_df = expdev_df.rename(columns={0:'DevType'})
expdev_df = expdev_df.drop(columns='level_4')

expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,DatabaseWorkedWith,FrameworkWorkedWith,DevType
0,3047,C;C++;C#;Java;Python;Swift;HTML;CSS,MySQL;SQLite,TensorFlow,Game or graphics developer
1,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Data or business analyst
2,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Educator or academic researcher
3,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Engineering manager
4,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Mobile developer


In [6]:
expdev_df['LanguageList'] = expdev_df.LanguageWorkedWith.str.split(';')
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,DatabaseWorkedWith,FrameworkWorkedWith,DevType,LanguageList
0,3047,C;C++;C#;Java;Python;Swift;HTML;CSS,MySQL;SQLite,TensorFlow,Game or graphics developer,"[C, C++, C#, Java, Python, Swift, HTML, CSS]"
1,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Data or business analyst,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Engineering manager,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Mobile developer,"[C, C++, Java, SQL, Swift]"


In [7]:
# hard code top 10 languages from stack ooverflow website
topLang = ['JavaScript', 'HTML', 'CSS', 'SQL', 'Java', 'Bash/Shell', 'Python', 'C#', 'PHP', 'C++']

In [8]:
# Test case for one language
# Check if language is in each entries list of known languagesLanguage List. If yes, return 1, otherwise, return 0
# expdev_df['Python'] = np.where(expdev_df.LanguageList.apply(lambda x: 'Python' in x), 1, 0)

In [9]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,DatabaseWorkedWith,FrameworkWorkedWith,DevType,LanguageList
0,3047,C;C++;C#;Java;Python;Swift;HTML;CSS,MySQL;SQLite,TensorFlow,Game or graphics developer,"[C, C++, C#, Java, Python, Swift, HTML, CSS]"
1,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Data or business analyst,"[C, C++, Java, SQL, Swift]"
2,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Educator or academic researcher,"[C, C++, Java, SQL, Swift]"
3,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Engineering manager,"[C, C++, Java, SQL, Swift]"
4,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Mobile developer,"[C, C++, Java, SQL, Swift]"


In [10]:
# Loop through each language and see if each respondent knows the language
for language in topLang:
    
    # dynamically create new column headers
    print('Checking for: ' + language + '...')        
    
    # Create new column to check if the respondent knows a language. If the languege is in the list of known languages, it gets 1, else it gets 0
    expdev_df[language] = np.where(expdev_df.LanguageList.apply(lambda x: language in x), 1, 0)
    
    print(f'{language} done!')


Checking for: JavaScript...
JavaScript done!
Checking for: HTML...
HTML done!
Checking for: CSS...
CSS done!
Checking for: SQL...
SQL done!
Checking for: Java...
Java done!
Checking for: Bash/Shell...
Bash/Shell done!
Checking for: Python...
Python done!
Checking for: C#...
C# done!
Checking for: PHP...
PHP done!
Checking for: C++...
C++ done!


In [11]:
expdev_df.head()

Unnamed: 0,Respondent,LanguageWorkedWith,DatabaseWorkedWith,FrameworkWorkedWith,DevType,LanguageList,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
0,3047,C;C++;C#;Java;Python;Swift;HTML;CSS,MySQL;SQLite,TensorFlow,Game or graphics developer,"[C, C++, C#, Java, Python, Swift, HTML, CSS]",0,1,1,0,1,0,1,1,0,1
1,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Data or business analyst,"[C, C++, Java, SQL, Swift]",0,0,0,1,1,0,0,0,0,1
2,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Educator or academic researcher,"[C, C++, Java, SQL, Swift]",0,0,0,1,1,0,0,0,0,1
3,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Engineering manager,"[C, C++, Java, SQL, Swift]",0,0,0,1,1,0,0,0,0,1
4,2999,C;C++;Java;SQL;Swift,SQLite;Oracle,,Mobile developer,"[C, C++, Java, SQL, Swift]",0,0,0,1,1,0,0,0,0,1


In [12]:
# Aggregation by mean gives a fraction of dev types who know each skill
langByDev = expdev_df.groupby('DevType').mean()
langByDev = langByDev.drop(index='', columns='Respondent')

# multiply all columns by 100 to make it a percntage
langByDev.loc[:] *= 100

# Round each percent to 2 decimals
langByDev = langByDev.round(decimals=2)

langByDev

Unnamed: 0_level_0,JavaScript,HTML,CSS,SQL,Java,Bash/Shell,Python,C#,PHP,C++
DevType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Back-end developer,64.91,62.19,59.54,56.08,40.99,36.8,32.82,32.73,30.7,20.74
"C-suite executive (CEO, CTO, etc.)",66.74,65.65,63.42,57.03,36.06,42.22,34.83,31.19,38.41,24.49
Data or business analyst,56.03,58.51,54.82,65.31,34.69,35.93,41.57,32.56,26.95,19.71
Data scientist or machine learning specialist,46.49,48.28,44.29,49.39,38.6,43.75,64.86,21.4,18.91,30.88
Database administrator,68.03,69.74,67.52,70.93,37.64,41.12,34.59,36.39,43.01,22.14
Designer,65.33,69.14,67.14,52.7,38.25,31.2,28.85,32.42,36.96,24.84
Desktop or enterprise applications developer,60.06,60.71,57.76,58.26,41.45,33.38,29.59,51.84,24.25,30.75
DevOps specialist,67.54,63.17,60.38,58.44,41.33,60.19,46.26,30.06,30.42,19.48
Educator or academic researcher,53.53,57.98,54.74,44.96,41.22,45.62,51.28,24.64,27.57,36.36
Embedded applications or devices developer,47.37,47.64,43.79,39.07,40.42,47.29,48.95,32.93,23.41,55.16


In [13]:
# Convert df to dict to serve as json. Orient by index to make it easily searchable by devtype
langByDev_dict = langByDev.to_dict(orient='index')

pprint(langByDev_dict)

{'Back-end developer': {'Bash/Shell': 36.8,
                        'C#': 32.73,
                        'C++': 20.74,
                        'CSS': 59.54,
                        'HTML': 62.19,
                        'Java': 40.99,
                        'JavaScript': 64.91,
                        'PHP': 30.7,
                        'Python': 32.82,
                        'SQL': 56.08},
 'C-suite executive (CEO, CTO, etc.)': {'Bash/Shell': 42.22,
                                        'C#': 31.19,
                                        'C++': 24.49,
                                        'CSS': 63.42,
                                        'HTML': 65.65,
                                        'Java': 36.06,
                                        'JavaScript': 66.74,
                                        'PHP': 38.41,
                                        'Python': 34.83,
                                        'SQL': 57.03},
 'Data or business analyst': {'Bash/Shell': 