In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/surveyres2023.csv' , index_col='ResponseId')
df_schema = pd.read_csv('data/surveyschema2023.csv')

In [3]:
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_columns' , 84)
pd.set_option('display.max_rows' , 84)
pd.set_option('display.max_colwidth', None)

In [4]:
df_schema.loc[[15,16],'question']

15                             Including any education, how many years have you been coding in total?
16    NOT including education, how many years have you coded professionally (as a part of your work)?
Name: question, dtype: object

In [5]:
df_schema = pd.read_csv('data/surveyschema2023.csv' , index_col = 'qname' )

In [6]:
df_schema.drop(["S0","S1","S2","S3","S4","S5","S6","S7","Q120","Q310"] , inplace = True)

In [7]:
df_schema = df_schema.drop(columns=['type', 'selector' , 'qid' , 'force_resp'])

In [8]:
df_schema.head(5)

Unnamed: 0_level_0,question
qname,Unnamed: 1_level_1
MetaInfo,Browser Meta Info
MainBranch,"Which of the following options best describes you today? For the purpose of this survey, a developer is ""someone who writes code"". *"
Age,What is your age? *
Employment,Which of the following best describes your current employment status? Select all that apply.
RemoteWork,Which best describes your current work situation?


In [9]:
#df['Country'] = df['Country'].apply(lambda x:'USA' if x=='United States of America' else 'Iran' if x=='Iran, Islamic Republic of...' else x )
#esier way:
df['Country'] = df['Country'].replace({'United States of America':'United States', 'Iran, Islamic Republic of...': 'Iran'})
df['Country']

ResponseId
1                  NaN
2        United States
3        United States
4        United States
5          Philippines
             ...      
89180           Brazil
89181          Romania
89182           Israel
89183      Switzerland
89184             Iran
Name: Country, Length: 89184, dtype: object

In [10]:
filt = ((df['CompTotal']>96000) & (df['Country']=='United States') & (df['LanguageHaveWorkedWith'].str.contains('Python' , na=False)))
df.loc[filt,['CompTotal','LanguageHaveWorkedWith','Country']]

Unnamed: 0_level_0,CompTotal,LanguageHaveWorkedWith,Country
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,285000.00,HTML/CSS;JavaScript;Python,United States
14,150000.00,C;C++;Python;Rust,United States
111,145000.00,HTML/CSS;JavaScript;Python;TypeScript,United States
122,200990.00,Go;HTML/CSS;JavaScript;PowerShell;Python;Rust,United States
138,150000.00,Python;R;SQL,United States
...,...,...,...
89079,150000.00,C;C++;Go;HTML/CSS;Java;JavaScript;MATLAB;Python;Rust;SQL;TypeScript,United States
89087,182333.00,Bash/Shell (all shells);HTML/CSS;JavaScript;Python;SQL;TypeScript,United States
89113,187000.00,C#;HTML/CSS;JavaScript;Python;Ruby;TypeScript,United States
89114,200000.00,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaScript;Objective-C;PHP;PowerShell;Python;SQL;TypeScript,United States


In [11]:
#How to reorder columns:
#df['column1'],df['column2'] = df['column2'],df['column1']

In [12]:
'''There are couple of columns that aren't necessary that named knowledge_1,2 and ... and their existence just make our
dataframe nasty so let's create a function that will erase them for us.'''

def erase(*args):
    for arg in args:      
        for col in df.columns:
            if arg in col:
                df.drop(col, axis=1 , inplace = True)

In [13]:
#Before erasing
df.shape

(89184, 83)

In [14]:
erase('Knowledge_','Frequency_')
df.shape
#After erasing

(89184, 72)

In [15]:
del df['Q120']

In [16]:
df.loc[df['Country'].isin(['Iran','United States']),'Country'].value_counts()

Country
United States    18647
Iran               577
Name: count, dtype: int64

In [17]:
filt = (df['Country']=='Iran')

In [18]:
df.loc[filt, 'CompTotal']

ResponseId
75            5500.00
88                NaN
143     3600000000.00
177               NaN
228          20000.00
             ...     
88569             NaN
88636    110000000.00
88930             NaN
88939             NaN
89184   3300000000.00
Name: CompTotal, Length: 577, dtype: float64

In [19]:
filt = (df['ConvertedCompYearly'] < 2000000)
df = df.loc[filt]

In [20]:
df['RemoteWork'].unique()

array(['Remote', 'Hybrid (some remote, some in-person)', 'In-person', nan],
      dtype=object)

In [21]:
df['RemoteWork'].value_counts(normalize=True)

RemoteWork
Remote                                 0.44
Hybrid (some remote, some in-person)   0.42
In-person                              0.14
Name: proportion, dtype: float64

In [22]:
country_df = df.groupby(['Country'])

In [23]:
country_df['WebframeHaveWorkedWith'].apply(lambda x: x.str.contains('FastAPI', na=False).sum()).nlargest(25)

Country
United States                                           675
Germany                                                 265
United Kingdom of Great Britain and Northern Ireland    194
India                                                   159
Canada                                                  128
France                                                  126
Poland                                                  104
Brazil                                                   91
Netherlands                                              91
Spain                                                    88
Israel                                                   67
Australia                                                66
Italy                                                    63
Sweden                                                   56
Switzerland                                              45
Russian Federation                                       41
Mexico                          

In [24]:
filt = (df['WebframeHaveWorkedWith'].str.contains('FastAPI', na=False))
df_fastapi = df.loc[filt]

In [25]:
fastapi_grp = df_fastapi.groupby(['Country'])

In [26]:
fastapi_grp['ConvertedCompYearly'].median().nlargest(25)

Country
Montenegro                                             160634.00
United States                                          160000.00
Luxembourg                                             128507.00
Saudi Arabia                                           126306.00
Israel                                                 122948.00
Cyprus                                                 122317.00
Iceland                                                120674.00
Switzerland                                            114625.00
Swaziland                                              100310.00
Myanmar                                                100000.00
Australia                                               99156.00
China                                                   98482.00
Canada                                                  97028.50
Denmark                                                 94898.00
Ireland                                                 93168.00
United Kingdom of

# Modifying 2019 Data

In [27]:
df19 = pd.read_csv('data/data2019.csv', index_col= 'Respondent')

In [28]:
df19.dropna(axis='index', how= 'any', subset= ['DevType'], inplace=True)

In [29]:
df19['DevType'] = df19['DevType'].apply(lambda x:'Senior Executive (C-Suite, VP, etc.)' if 'Senior executive/VP' in x else x)

In [30]:
df19['DevType'] = df19['DevType'].apply(lambda x:x.split(';')[0])

In [31]:
df19['DevType'].value_counts().head(80)

DevType
Developer, back-end                              25635
Developer, full-stack                            11796
Database administrator                            5906
Academic researcher                               5684
Designer                                          5085
Data or business analyst                          4880
Developer, front-end                              4869
Developer, desktop or enterprise applications     3360
Data scientist or machine learning specialist     2998
Developer, mobile                                 2645
Senior Executive (C-Suite, VP, etc.)              2107
Student                                           1561
Developer, embedded applications or devices       1322
DevOps specialist                                  614
Developer, QA or test                              603
Engineering manager                                451
Developer, game or graphics                        391
Engineer, data                                     382
Sy

In [32]:
df19['Year'] = 2019

In [33]:
df19['SalaryUSD'].fillna(0, inplace=True)
df19['SalaryUSD'] = df19['SalaryUSD'].astype(int)

In [34]:
salary_bins = [0,60000,90000,150000,250000, float('inf')]
salary_labels = ['0-60000', '60001-90000', '90001-150000', '150001-250000', '250001+']

In [35]:
df19['salary_range'] = pd.cut(df19['SalaryUSD'], bins=salary_bins, labels=salary_labels, right=False)

In [36]:
df2019 = df19

In [37]:
df19= df19.loc[:, ['Country', 'SalaryUSD', 'DevType', 'salary_range', 'Year']]

## df 2023

In [38]:
df['Year'] = 2023

In [39]:
df['salary_range'] = pd.cut(df['ConvertedCompYearly'], bins=salary_bins, labels=salary_labels, right=False)

In [40]:
sal_country_grp = df.groupby(['Country', 'salary_range'], observed=False)

In [41]:
sal_country_grp.get_group(('United States', '90001-150000'))

Unnamed: 0_level_0,MainBranch,Age,Employment,RemoteWork,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,LearnCodeCoursesCert,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,TechList,BuyNewTool,Country,Currency,CompTotal,LanguageHaveWorkedWith,LanguageWantToWorkWith,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,PlatformHaveWorkedWith,PlatformWantToWorkWith,WebframeHaveWorkedWith,WebframeWantToWorkWith,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,NEWCollabToolsHaveWorkedWith,NEWCollabToolsWantToWorkWith,OpSysPersonal use,OpSysProfessional use,OfficeStackAsyncHaveWorkedWith,OfficeStackAsyncWantToWorkWith,OfficeStackSyncHaveWorkedWith,OfficeStackSyncWantToWorkWith,AISearchHaveWorkedWith,AISearchWantToWorkWith,AIDevHaveWorkedWith,AIDevWantToWorkWith,NEWSOSites,SOVisitFreq,SOAccount,SOPartFreq,SOComm,SOAI,AISelect,AISent,AIAcc,AIBen,AIToolInterested in Using,AIToolCurrently Using,AIToolNot interested in Using,AINextVery different,AINextNeither different nor similar,AINextSomewhat similar,AINextVery similar,AINextSomewhat different,TBranch,ICorPM,WorkExp,TimeSearching,TimeAnswering,ProfessionalTech,Industry,SurveyLength,SurveyEase,ConvertedCompYearly,Year,salary_range
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1
7,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Hobby;Contribute to open-source projects;Professional development or self-paced learning from online courses,Some college/university study without earning a degree,Friend or family member;Online Courses or Certification;Coding Bootcamp,,Udemy,4,3,"Developer, full-stack","1,000 to 4,999 employees",I have little or no influence,,Ask developers I know/work with;Read ratings or reviews on third party sites like G2 Crowd,United States,USD\tUnited States dollar,135000.00,Ada;Clojure;Elixir;Go;HTML/CSS;Java;JavaScript;Lisp;OCaml;Raku;Ruby;Scala;Swift;TypeScript;Zig,,MariaDB;Microsoft SQL Server;MySQL;PostgreSQL;SQLite,Datomic,Amazon Web Services (AWS);Digital Ocean,Vercel,AngularJS;jQuery;Node.js;Phoenix;Ruby on Rails;Solid.js;Svelte;Vue.js,,RabbitMQ;Spring Framework,,Bun;Cargo;Chocolatey;Docker;Homebrew;Kubernetes;Make;Maven (build tool);npm;pnpm;Vite;Yarn,,Emacs;IntelliJ IDEA;RubyMine;Visual Studio Code;Xcode,,MacOS;Windows,MacOS,Jira,,Slack,,ChatGPT,,GitHub Copilot,,Stack Overflow,A few times per month or weekly,Yes,Less than once per month or monthly,Neutral,Neutral,Yes,Unfavorable,Improve accuracy in coding,Somewhat distrust,,Writing code;Debugging and getting help,Learning about a codebase;Project planning;Writing code;Documenting code;Testing code;Committing and reviewing code;Deployment and monitoring;Collaborating with teammates,,,,,,Yes,Individual contributor,4.00,Less than 15 minutes a day,15-30 minutes a day,Microservices;Automated testing;Continuous integration (CI) and (more often) continuous delivery,"Information Services, IT, Software Development, or other Technology",Appropriate in length,Easy,135000.00,2023,90001-150000
37,I am a developer by profession,55-64 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Bootstrapping a business;Professional development or self-paced learning from online courses;Freelance/contract work,Some college/university study without earning a degree,"Books / Physical media;Online Courses or Certification;On the job training;Other online resources (e.g., videos, blogs, forum)",Formal documentation provided by the owner of the tech;How-to videos;Video-based Online Courses;Stack Overflow,Udemy;Pluralsight,33,15,"Developer, full-stack",100 to 499 employees,I have some influence,Investigate,Start a free trial;Visit developer communities like Stack Overflow,United States,USD\tUnited States dollar,130000.00,C#;HTML/CSS;JavaScript;SQL;TypeScript,C#;HTML/CSS;JavaScript;Python;Ruby;SQL;TypeScript,Elasticsearch;Microsoft SQL Server;SQLite,Cassandra;Elasticsearch;Microsoft SQL Server;MongoDB;Redis,Microsoft Azure,Amazon Web Services (AWS);Microsoft Azure,Angular;ASP.NET;ASP.NET CORE;Blazor;jQuery,Angular;ASP.NET CORE;Blazor,.NET (5+) ;.NET Framework (1.0 - 4.8),.NET (5+) ;Xamarin,MSBuild;Visual Studio Solution,Docker;Kubernetes;MSBuild;Visual Studio Solution,Notepad++;Visual Studio Code,Notepad++;Visual Studio Code,Windows,Windows,Azure Devops;Jira,Azure Devops;Jira,Microsoft Teams;Zoom,Microsoft Teams,ChatGPT;Google Bard AI,ChatGPT;Google Bard AI,,,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,I have never participated in Q&A on Stack Overflow,"Yes, somewhat",,Yes,Favorable,Greater efficiency;Improve accuracy in coding,Somewhat trust,Learning about a codebase,Writing code;Debugging and getting help,,,,,,,Yes,Individual contributor,39.00,30-60 minutes a day,30-60 minutes a day,DevOps function,"Manufacturing, Transportation, or Supply Chain",Appropriate in length,Easy,130000.00,2023,90001-150000
101,I am a developer by profession,45-54 years old,"Employed, full-time",Remote,Contribute to open-source projects;Professional development or self-paced learning from online courses,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","On the job training;Other online resources (e.g., videos, blogs, forum)",Formal documentation provided by the owner of the tech;Blogs with tips and tricks;Written-based Online Courses;Written Tutorials;Click to write Choice 20;Stack Overflow,,29,24,Database administrator,100 to 499 employees,I have little or no influence,,Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Read ratings or reviews on third party sites like G2 Crowd,United States,USD\tUnited States dollar,140000.00,Groovy;HTML/CSS;Java;JavaScript;SQL,Groovy;HTML/CSS;Java;JavaScript,,,Amazon Web Services (AWS),Amazon Web Services (AWS),Angular;jQuery;WordPress,,,,Gradle;Homebrew;Maven (build tool),Chef;Gradle;Maven (build tool),CLion;Eclipse;IntelliJ IDEA;Netbeans;Notepad++,IntelliJ IDEA;Notepad++,MacOS,MacOS;Red Hat;Windows;Other (Please Specify):,Confluence;Jira,Confluence;Jira,Google Chat;Microsoft Teams;Slack;Zoom,Google Chat;Microsoft Teams;Slack,ChatGPT,,,,Stack Overflow;Stack Exchange,Multiple times per day,Yes,Multiple times per day,"Yes, definitely","It is difficult to judge the quality and correctness of AI generated answers at the moment. They often read well and appear to be correct, but who knows. I guess up/down votes would help moderate, as they do for human generated content. I would like to see how AI could help with edits and triage though.","No, and I don't plan to",,,,,,,,,,,,No,,,,,,,Appropriate in length,Easy,140000.00,2023,90001-150000
111,I am a developer by profession,25-34 years old,"Employed, full-time",In-person,Hobby;Contribute to open-source projects,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Books / Physical media;Hackathons (virtual or in-person);Online Courses or Certification;Other online resources (e.g., videos, blogs, forum);School (i.e., University, College, etc)","Formal documentation provided by the owner of the tech;Blogs with tips and tricks;How-to videos;Video-based Online Courses;Written-based Online Courses;Online challenges (e.g., daily or weekly coding challenges);Written Tutorials;Click to write Choice 20;Stack Overflow",Codecademy;Udemy;Pluralsight;Udacity,12,7,"Developer, full-stack",100 to 499 employees,I have little or no influence,,Start a free trial;Ask developers I know/work with;Research companies that have advertised on sites I visit,United States,USD\tUnited States dollar,145000.00,HTML/CSS;JavaScript;Python;TypeScript,Dart;Go;HTML/CSS;JavaScript;Nim;Python;TypeScript;Zig,Firebase Realtime Database;MongoDB,MongoDB,,,Angular;AngularJS;Node.js;Vue.js,Node.js;Svelte;Vue.js,,,npm,npm;pnpm;Vite,Notepad++;Sublime Text;Vim;Visual Studio Code,Notepad++;Sublime Text;Vim;Visual Studio Code,Ubuntu;Windows;Windows Subsystem for Linux (WSL),Debian;Fedora;Ubuntu;Windows,Confluence;Jira;Notion,Confluence;Jira,Google Chat;Google Meet;Microsoft Teams;Skype;Zoom,Google Chat;Google Meet;Microsoft Teams;Signal;Skype;Zoom,Bing AI;ChatGPT;Google Bard AI;WolframAlpha;You.com,ChatGPT,,,Stack Overflow;Stack Exchange,Daily or almost daily,Yes,A few times per week,Neutral,"Possible duplicate/solution detection before posting. Something like, is this an answer: do x?",Yes,Favorable,Increase productivity;Greater efficiency,Neither trust nor distrust,Documenting code;Debugging and getting help;Testing code;Committing and reviewing code,Writing code,Learning about a codebase;Project planning;Deployment and monitoring;Collaborating with teammates,,,,,,Yes,Individual contributor,7.00,60-120 minutes a day,15-30 minutes a day,None of these,,Appropriate in length,Easy,145000.00,2023,90001-150000
131,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Hobby;Contribute to open-source projects,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Other online resources (e.g., videos, blogs, forum)",Formal documentation provided by the owner of the tech;Blogs with tips and tricks;Written Tutorials;Click to write Choice 20;Stack Overflow,,14,12,"Developer, desktop or enterprise applications",500 to 999 employees,I have some influence,Investigate,Ask developers I know/work with,United States,USD\tUnited States dollar,118000.00,Bash/Shell (all shells);C#;PowerShell;Visual Basic (.Net),,MariaDB;MongoDB;MySQL,,Netlify;Vercel,,ASP.NET CORE;NestJS;Next.js;Node.js;React,,.NET Framework (1.0 - 4.8),,MSBuild;npm,,Android Studio;Notepad++;Visual Studio Code,,Debian;Windows,Windows,,,Cisco Webex Teams;Microsoft Teams,,,,,,Stack Overflow;Stack Exchange,Multiple times per day,Yes,Daily or almost daily,"Yes, definitely",,"No, and I don't plan to",,,,,,,,,,,,Yes,Individual contributor,22.00,15-30 minutes a day,15-30 minutes a day,DevOps function;Developer portal or other central places to find tools/services;Continuous integration (CI) and (more often) continuous delivery,,Appropriate in length,Easy,118000.00,2023,90001-150000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89037,I am a developer by profession,25-34 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Freelance/contract work,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Books / Physical media;Online Courses or Certification;On the job training;Other online resources (e.g., videos, blogs, forum);School (i.e., University, College, etc)","Formal documentation provided by the owner of the tech;How-to videos;Video-based Online Courses;Online challenges (e.g., daily or weekly coding challenges);Stack Overflow",Codecademy;Udemy;Pluralsight,6,3,"Developer, full-stack",500 to 999 employees,I have some influence,Investigate,Start a free trial;Ask developers I know/work with;Visit developer communities like Stack Overflow;Ask a generative AI tool,United States,USD\tUnited States dollar,110000.00,Elixir;JavaScript;SQL,Elixir;JavaScript;Solidity,MySQL;PostgreSQL;Redis,,,,AngularJS;Express;Node.js;React,Express;Node.js;React,Apache Kafka;RabbitMQ,,Docker;Homebrew,Unity 3D;Unreal Engine,Visual Studio Code;Xcode,Visual Studio Code,Windows,MacOS,Confluence;Jira,Jira,Discord;Google Meet;Slack;Zoom,Discord;Slack,ChatGPT,ChatGPT,,,Stack Overflow;Stack Overflow for Teams (private knowledge sharing & collaboration platform for companies),Less than once per month or monthly,Not sure/can't remember,,"No, not really",,Yes,Very favorable,Increase productivity;Greater efficiency;Speed up learning,Somewhat distrust,Learning about a codebase;Project planning;Testing code;Committing and reviewing code;Deployment and monitoring,Debugging and getting help,,,,,,Debugging and getting help,Yes,Individual contributor,3.00,15-30 minutes a day,60-120 minutes a day,DevOps function;Microservices;Continuous integration (CI) and (more often) continuous delivery,Financial Services,Too long,Easy,110000.00,2023,90001-150000
89071,I am a developer by profession,25-34 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Hobby,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Other online resources (e.g., videos, blogs, forum);School (i.e., University, College, etc)",How-to videos;Written Tutorials;Stack Overflow,,11,3,Research & Development role,"10,000 or more employees",I have some influence,Given a list,Start a free trial;Visit developer communities like Stack Overflow,United States,USD\tUnited States dollar,111000.00,Bash/Shell (all shells);HTML/CSS;JavaScript;Python;SQL;TypeScript,Bash/Shell (all shells);HTML/CSS;Python;TypeScript,Cloud Firestore;PostgreSQL,Cloud Firestore;PostgreSQL,Amazon Web Services (AWS);Digital Ocean;Firebase,Amazon Web Services (AWS);Digital Ocean;Firebase,Flask;Node.js;React;Remix,ASP.NET CORE;Deno;Node.js;React;Remix,,,APT;Docker;Homebrew;npm;NuGet;Unity 3D,Bun;Docker;Homebrew;npm;NuGet;pnpm;Unity 3D;Unreal Engine;Webpack,Jupyter Notebook/JupyterLab;Notepad++;Vim;Visual Studio;Visual Studio Code,Jupyter Notebook/JupyterLab;Notepad++;Vim;Visual Studio Code,Debian;Windows;Windows Subsystem for Linux (WSL),Debian;MacOS,Azure Devops;GitHub Discussions;Markdown File;Stack Overflow for Teams;Trello,GitHub Discussions;Markdown File;Trello,Discord;Google Meet;Microsoft Teams;Slack;Zoom,Discord;Microsoft Teams;Slack,ChatGPT;Phind,ChatGPT;Phind,,,Stack Overflow,Multiple times per day,Yes,A few times per month or weekly,Neutral,,"No, but I plan to soon",Favorable,,Somewhat trust,,,,,,,,,Yes,Individual contributor,1.00,15-30 minutes a day,30-60 minutes a day,DevOps function;Observability tools;Continuous integration (CI) and (more often) continuous delivery,"Manufacturing, Transportation, or Supply Chain",Appropriate in length,Neither easy nor difficult,111000.00,2023,90001-150000
89082,I am a developer by profession,25-34 years old,"Employed, full-time",Remote,Hobby,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Online Courses or Certification;On the job training;Other online resources (e.g., videos, blogs, forum)","Formal documentation provided by the owner of the tech;Blogs with tips and tricks;Recorded coding sessions;How-to videos;Video-based Online Courses;Written-based Online Courses;Online challenges (e.g., daily or weekly coding challenges);Written Tutorials;Click to write Choice 20;Stack Overflow;Interactive tutorial",Codecademy;Udemy;Pluralsight,6,5,"Developer, full-stack","5,000 to 9,999 employees",I have little or no influence,,Start a free trial;Visit developer communities like Stack Overflow;Research companies that have advertised on sites I visit;Read ratings or reviews on third party sites like G2 Crowd,United States,USD\tUnited States dollar,95000.00,C#;Java,C#;Java;JavaScript;Rust;TypeScript,Oracle,Microsoft SQL Server;MongoDB;MySQL,,,ASP.NET;ASP.NET CORE;Spring Boot,React,.NET Framework (1.0 - 4.8),,npm;NuGet;Visual Studio Solution,Cargo;Kubernetes;npm,Notepad++;Visual Studio;Visual Studio Code,Notepad++;Visual Studio Code,Ubuntu;Windows,MacOS;Ubuntu;Windows,Confluence,,Discord;Microsoft Teams,Discord;Microsoft Teams,,,,,Stack Overflow;Stack Exchange,A few times per week,Not sure/can't remember,,"No, not really",It could maybe help show relevant questions on a search that might not necessarily be worded the same.,"No, and I don't plan to",,,,,,,,,,,,Yes,Individual contributor,4.00,60-120 minutes a day,30-60 minutes a day,DevOps function;Microservices;Automated testing;Observability tools;Innersource initiative,Financial Services,Too long,Neither easy nor difficult,95000.00,2023,90001-150000
89092,I am a developer by profession,45-54 years old,"Employed, full-time","Hybrid (some remote, some in-person)",I don’t code outside of work,"Professional degree (JD, MD, Ph.D, Ed.D, etc.)","Online Courses or Certification;On the job training;Other online resources (e.g., videos, blogs, forum);Coding Bootcamp",Formal documentation provided by the owner of the tech;Recorded coding sessions;How-to videos;Video-based Online Courses;Stack Overflow,Pluralsight;Udacity,8,5,"Developer, front-end","10,000 or more employees",I have some influence,Investigate,Start a free trial;Ask developers I know/work with,United States,USD\tUnited States dollar,123000.00,HTML/CSS;JavaScript;TypeScript,HTML/CSS;Java;JavaScript;TypeScript,Cloud Firestore,Cloud Firestore,Firebase,Amazon Web Services (AWS);Firebase,Angular,Angular,,,Homebrew;npm,Homebrew;npm,Android Studio;Visual Studio Code;Xcode,Android Studio;IntelliJ IDEA;Visual Studio Code;Xcode,MacOS,MacOS,Confluence;Jira,Confluence;GitHub Discussions;Jira,Microsoft Teams;Zoom,Microsoft Teams;Slack;Zoom,,,,,Stack Overflow;Stack Exchange,Multiple times per day,Yes,Less than once per month or monthly,"Yes, somewhat","If AI could improve new users' ability *and likelihood* to prevent duplicates by finding existing answers to their question, THAT could be very useful.","No, and I don't plan to",,,,,,,,,,,,Yes,Individual contributor,5.00,15-30 minutes a day,15-30 minutes a day,DevOps function;Automated testing;Observability tools;Continuous integration (CI) and (more often) continuous delivery,Financial Services,Appropriate in length,Easy,123000.00,2023,90001-150000


In [42]:
# filt = (df['DevType']== 'Other (please specify):')
# df.drop(index=df[filt].index)
# df['DevType'].replace({'Research & Development role':'Academic researcher', 'Cloud infrastructure engineer':'DevOps specialist',
#                        'Security professional':'DevOps specialist', 'Hardware Engineer':'Engineering manager',
#                        'Project manager':'Product manager', 'Developer Experience': 'Developer, QA or test',
                       

In [43]:
devseries = sal_country_grp['DevType'].value_counts().loc['United States',['90001-150000', '150001-250000', '250001+']].head(80)
devseries

Country        salary_range   DevType                                      
United States  90001-150000   Developer, full-stack                            1771
                              Developer, back-end                               585
                              Developer, front-end                              279
                              Developer, desktop or enterprise applications     259
                              Developer, embedded applications or devices       172
                              Other (please specify):                           126
                              Data scientist or machine learning specialist      93
                              Developer, mobile                                  89
                              Engineer, data                                     81
                              DevOps specialist                                  74
                              Engineering manager                                66


In [44]:
sal_country_grp['WebframeHaveWorkedWith'].apply(lambda x: x.str.contains('FastAPI', na=False).sum() / len(x) * 100).loc[['United States', 'Canada']]

Country        salary_range 
United States  0-60000         2.68
               60001-90000     4.04
               90001-150000    4.92
               150001-250000   6.66
               250001+         6.95
Canada         0-60000         6.82
               60001-90000     4.96
               90001-150000    6.02
               150001-250000   7.93
               250001+         8.70
Name: WebframeHaveWorkedWith, dtype: float64

In [45]:
sal_country_grp[['LearnCodeCoursesCert', 'EdLevel']].value_counts(normalize=True).loc['United States', ['90001-150000', '150001-250000']].nlargest(60)

Country        salary_range   LearnCodeCoursesCert                   EdLevel                                                                           
United States  90001-150000   Udemy                                  Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                         0.11
               150001-250000  Udemy                                  Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                         0.08
                              Pluralsight                            Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                         0.07
               90001-150000   Pluralsight                            Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                         0.06
               150001-250000  Udemy;Pluralsight                      Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                         0.06
               90001-150000   Udemy;Pluralsight      

In [46]:
df.rename(columns={'ConvertedCompYearly':'SalaryUSD'}, inplace=True)
df.index.names = ['Respondent']

In [47]:
df_devtype = df.loc[:, ['Country', 'SalaryUSD', 'DevType', 'salary_range', 'Year']]

In [48]:
df_devtype = pd.concat([df19, df_devtype], sort=False)

In [49]:
df_devtype['SalaryUSD'] = df_devtype['SalaryUSD'].astype(int)

In [50]:
df_devtype

Unnamed: 0_level_0,Country,SalaryUSD,DevType,salary_range,Year
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,Bosnia and Herzegovina,0,"Developer, desktop or enterprise applications",0-60000,2019
3,Thailand,8820,Designer,0-60000,2019
4,United States,61000,"Developer, full-stack",60001-90000,2019
5,Ukraine,0,Academic researcher,0-60000,2019
6,Canada,366420,Data or business analyst,250001+,2019
...,...,...,...,...,...
89163,Brazil,50719,"Developer, back-end",0-60000,2023
89167,Armenia,16917,"Developer, full-stack",0-60000,2023
89168,India,15752,"Developer, mobile",0-60000,2023
89171,France,64254,Other (please specify):,60001-90000,2023


In [51]:
devgroup = df_devtype.groupby(['Year', 'Country', 'salary_range'], observed=False)
salgp = df_devtype.groupby(['Year', 'Country', 'DevType'])

In [52]:
devgroup['DevType'].value_counts().loc[[2019, 2023],'United States', '150001-250000'].head(60)

Year  Country        salary_range   DevType                                      
2019  United States  150001-250000  Developer, back-end                               915
                                    Developer, full-stack                             391
                                    Senior Executive (C-Suite, VP, etc.)              146
                                    Database administrator                            144
                                    Data scientist or machine learning specialist     138
                                    Developer, front-end                              134
                                    Data or business analyst                          110
                                    Developer, desktop or enterprise applications      88
                                    Designer                                           82
                                    Academic researcher                                70
                  

In [53]:
salgp['SalaryUSD'].median().loc[[2019,2023], 'United States', ['Data scientist or machine learning specialist',
                                                              'DevOps specialist', 'Developer, back-end', 'Developer, front-end',
                                                              'Developer, full-stack']]

Year  Country        DevType                                      
2019  United States  Data scientist or machine learning specialist   110000.00
                     DevOps specialist                               111000.00
                     Developer, back-end                             105000.00
                     Developer, front-end                             89000.00
                     Developer, full-stack                            92450.00
2023  United States  Data scientist or machine learning specialist   160000.00
                     DevOps specialist                               160000.00
                     Developer, back-end                             165000.00
                     Developer, front-end                            139510.00
                     Developer, full-stack                           140000.00
Name: SalaryUSD, dtype: float64

In [54]:
filt = (df['WebframeHaveWorkedWith'].str.contains('Django', na=False))
df.loc[filt, 'WebframeHaveWorkedWith'].count()

4141

In [55]:
filt = (df['WebframeHaveWorkedWith'].str.contains('FastAPI', na=False))
df.loc[filt, 'WebframeHaveWorkedWith'].count()

3088

In [56]:
df2019.rename(columns= {'WebFrameWorkedWith': 'WebframeHaveWorkedWith'}, inplace= True) 

In [57]:
filt = (df2019['WebframeHaveWorkedWith'].str.contains('Django', na=False))
df2019.loc[filt, 'WebframeHaveWorkedWith'].count()

7642

In [58]:
filt = (df2019['WebframeHaveWorkedWith'].str.contains('FastAPI', na=False))
df2019.loc[filt, 'WebframeHaveWorkedWith']

Series([], Name: WebframeHaveWorkedWith, dtype: object)

In [59]:
df_web19 = df2019.assign(WebframeHaveWorkedWith=df2019['WebframeHaveWorkedWith'].str.split(';')).explode('WebframeHaveWorkedWith')
df_web19 = df_web19.loc[:, ['Year', 'Country', 'WebframeHaveWorkedWith']]

In [60]:
df_web23 = df.assign(WebframeHaveWorkedWith=df['WebframeHaveWorkedWith'].str.split(';')).explode('WebframeHaveWorkedWith')
df_web23 = df_web23.loc[:, ['Year', 'Country', 'WebframeHaveWorkedWith']]

# WebFrame Work

In [61]:
df_framework = pd.concat([df_web19, df_web23], sort = False, ignore_index = True)

In [62]:
df_framework['Year'] = df_framework['Year'].astype(str)

In [63]:
filt = (df_framework['Year']=='2019')
df_framework[filt].count()

Year                      167235
Country                   167235
WebframeHaveWorkedWith    147334
dtype: int64

In [64]:
filt = (df_framework['WebframeHaveWorkedWith'].str.contains('FastAPI', na= False))
df_framework.loc[filt]

Unnamed: 0,Year,Country,WebframeHaveWorkedWith
167315,2023,Germany,FastAPI
167342,2023,Germany,FastAPI
167372,2023,Cyprus,FastAPI
167402,2023,Greece,FastAPI
167425,2023,France,FastAPI
...,...,...,...
307249,2023,Russian Federation,FastAPI
307287,2023,Russian Federation,FastAPI
307289,2023,India,FastAPI
307295,2023,Australia,FastAPI


In [65]:
df_framework.dropna(axis= 'index', how='any', subset=['WebframeHaveWorkedWith'], inplace= True)

In [66]:
filt = (df_framework['WebframeHaveWorkedWith'].str.contains('FastAPI', na= False)) & (df['Year']=='2019')
df_framework.loc[filt]

Unnamed: 0,Year,Country,WebframeHaveWorkedWith


In [67]:
frameworks_of_interest = ['Django', 'Flask', 'FastAPI']
df_framework_filtered = df_framework[df_framework['WebframeHaveWorkedWith'].isin(frameworks_of_interest)]

In [88]:
df_framework_filtered['Country'].count()

26337

In [68]:
filt = (df_framework['Year']=='2019')
df_framework_filtered.loc[filt]

Unnamed: 0,Year,Country,WebframeHaveWorkedWith
0,2019,Bosnia and Herzegovina,Django
3,2019,Ukraine,Django
5,2019,Ukraine,Flask
12,2019,India,Flask
20,2019,India,Django
...,...,...,...
167193,2019,Russian Federation,Django
167201,2019,United Kingdom,Django
167202,2019,United Kingdom,Flask
167211,2019,Argentina,Django


In [69]:
grp_framework = df_framework_filtered.groupby(['Year', 'Country', 'WebframeHaveWorkedWith']).size().reset_index(name='count')

In [70]:
django_df = grp_framework[grp_framework['WebframeHaveWorkedWith'] == 'Django']
flask_df = grp_framework[grp_framework['WebframeHaveWorkedWith'] == 'Flask']
fastapi_df = grp_framework[grp_framework['WebframeHaveWorkedWith'] == 'FastAPI']

In [78]:
django_flask = pd.merge(django_df, flask_df,  on=['Year', 'Country'], suffixes=('_django', '_flask'), how='outer')

In [80]:
comparison_df = pd.merge(django_flask, fastapi_df,  on=['Year', 'Country'], suffixes=('', '_fastapi'), how='outer')

In [81]:
comparison_df = comparison_df.rename(columns={'WebframeHaveWorkedWith': 'WebframeHaveWorkedWith_fastapi', 'count': 'count_fastapi'})

In [99]:
countries = ['United States', 'Canada', 'Germany', 'India', 'Australia']
filt = (comparison_df['Country'].isin(countries))
comparison_df.loc[filt]

Unnamed: 0,Year,Country,WebframeHaveWorkedWith_django,count_django,WebframeHaveWorkedWith_flask,count_flask,WebframeHaveWorkedWith_fastapi,count_fastapi
6,2019,Australia,Django,127.0,Flask,148.0,,
20,2019,Canada,Django,263.0,Flask,271.0,,
44,2019,Germany,Django,346.0,Flask,372.0,,
55,2019,India,Django,949.0,Flask,697.0,,
135,2019,United States,Django,1776.0,Flask,2037.0,,
148,2023,Australia,Django,89.0,Flask,89.0,FastAPI,66.0
162,2023,Canada,Django,173.0,Flask,180.0,FastAPI,128.0
181,2023,Germany,Django,244.0,Flask,296.0,FastAPI,265.0
189,2023,India,Django,270.0,Flask,247.0,FastAPI,159.0
267,2023,United States,Django,968.0,Flask,1160.0,FastAPI,675.0
