In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/survey_results_public.csv') # To read the data from csv

In [3]:
df.shape # attribute that gives rows and coloumn in tuple

(64461, 61)

In [4]:
df.info() # gives rows and coloumn along with datatypes of all coloumn

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64461 entries, 0 to 64460
Data columns (total 61 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Respondent                    64461 non-null  int64  
 1   MainBranch                    64162 non-null  object 
 2   Hobbyist                      64416 non-null  object 
 3   Age                           45446 non-null  float64
 4   Age1stCode                    57900 non-null  object 
 5   CompFreq                      40069 non-null  object 
 6   CompTotal                     34826 non-null  float64
 7   ConvertedComp                 34756 non-null  float64
 8   Country                       64072 non-null  object 
 9   CurrencyDesc                  45472 non-null  object 
 10  CurrencySymbol                45472 non-null  object 
 11  DatabaseDesireNextYear        44070 non-null  object 
 12  DatabaseWorkedWith            49537 non-null  object 
 13  D

In [5]:
pd.set_option('display.max_columns',61) # To display all the columns instead of default 20
pd.set_option('display.max_rows',61) # To display all the rows 

In [6]:
schama_df = pd.read_csv('data/survey_results_schema.csv',index_col='Column') 
# This is schema where we can see which question is realted to which column

In [7]:
schama_df.head(10)

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...
Hobbyist,Do you code as a hobby?
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
CompTotal,What is your current total compensation (salar...
ConvertedComp,Salary converted to annual USD salaries using ...
Country,Where do you live?
CurrencyDesc,Which currency do you use day-to-day? If your ...


# Selecting Rows and Columns:

In [8]:
# we can think dataframe as rows and coloums but in python term it is simple dict with coloumn as key and rows as a values 
# for example here in this table data Respondent will be one key and all the Respondents will be in values list.
 

In [9]:
df.shape

(64461, 61)

In [10]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq',
       'CompTotal', 'ConvertedComp', 'Country', 'CurrencyDesc',
       'CurrencySymbol', 'DatabaseDesireNextYear', 'DatabaseWorkedWith',
       'DevType', 'EdLevel', 'Employment', 'Ethnicity', 'Gender', 'JobFactors',
       'JobSat', 'JobSeek', 'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
  

In [11]:
# grab all Hobbyist responses
df['Hobbyist'].head()

0    Yes
1     No
2    Yes
3    Yes
4    Yes
Name: Hobbyist, dtype: object

In [12]:
# value_counts will give total values of unique values
df['Hobbyist'].value_counts()

Yes    50388
No     14028
Name: Hobbyist, dtype: int64

In [13]:
# first row and it Hobbyist columns
df.loc[0,'Hobbyist']

'Yes'

In [14]:
# 5 and 6 th row with given two columns
df.loc[[4,5],['Hobbyist','LanguageWorkedWith']]

Unnamed: 0,Hobbyist,LanguageWorkedWith
4,Yes,HTML/CSS;Ruby;SQL
5,No,HTML/CSS;Java;JavaScript


In [15]:
# we can use slicing for range of rows and columns but note that it will include last number also in slice
df.loc[0:2,'Hobbyist':'Country']

Unnamed: 0,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country
0,Yes,,13,Monthly,,,Germany
1,No,,19,,,,United Kingdom
2,Yes,,15,,,,Russian Federation


# Set , reset and use indexes

In [16]:
df.set_index('Respondent',inplace=True)
# We can also do while reading csv file itself by passing index_col param
# df = pd.read_csv('data/survey_results_public.csv',index_col='Respondent')

In [17]:
schama_df.loc['Hobbyist','QuestionText']

'Do you code as a hobby?'

In [18]:
schama_df.sort_index(inplace=True)

# Filtering Data using conditions on rows and columns

In [19]:
schama_df.loc[['ConvertedComp','LanguageWorkedWith'],'QuestionText']

Column
ConvertedComp         Salary converted to annual USD salaries using ...
LanguageWorkedWith    Which programming, scripting, and markup langu...
Name: QuestionText, dtype: object

In [20]:
# We will try to see which languages have highest salaries
high_sal_filt = (df['ConvertedComp'] > 70000)


In [21]:
df.loc[high_sal_filt,['Country','LanguageWorkedWith','ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8,United States,Python;SQL,116000.0
16,United Kingdom,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...,108576.0
17,United States,C#;HTML/CSS;JavaScript;Python;SQL;VBA,79000.0
18,United States,Bash/Shell/PowerShell;HTML/CSS;Perl,1260000.0
19,United States,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;S...,83400.0
...,...,...,...
65586,United States,,225000.0
65589,United States,,150000.0
65602,United States,,140000.0
65604,United States,,150000.0


In [22]:
# Lets try to grab high salries in particular countries and language list is not null

countries = ['United States','India','Germany']
countries_filt = df['Country'].isin(countries)
lang_not_null = df['LanguageWorkedWith'].isnull()

df.loc[ high_sal_filt & countries_filt & ~lang_not_null   , ['Country','LanguageWorkedWith','ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
8,United States,Python;SQL,116000.0
17,United States,C#;HTML/CSS;JavaScript;Python;SQL;VBA,79000.0
18,United States,Bash/Shell/PowerShell;HTML/CSS;Perl,1260000.0
19,United States,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;S...,83400.0
24,Germany,Bash/Shell/PowerShell;Java;Kotlin;PHP;SQL,91883.0
...,...,...,...
65476,United States,C;C#;C++;Dart;Go;HTML/CSS;Kotlin;Python;TypeSc...,200000.0
65477,United States,Bash/Shell/PowerShell;C;C++;Go;Perl;Python;Ruby,250000.0
65502,United States,C#;HTML/CSS;JavaScript;Perl;Python;SQL;TypeScript,2000000.0
65523,India,Bash/Shell/PowerShell;C;C++;Go;HTML/CSS;Java;J...,75396.0


In [23]:
# for LanguageWorkedWith column which is list of strings we can perform string methods just like normal python
# here we are trying to filter records where LanguageWorkedWith contains python and null values we will buypass

lang_filt = df['LanguageWorkedWith'].str.contains('Python' ,na=False)
df.loc[lang_filt, ['Country','LanguageWorkedWith','ConvertedComp']].head()

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,Russian Federation,Objective-C;Python;Swift,
8,United States,Python;SQL,116000.0
10,United Kingdom,HTML/CSS;Java;JavaScript;Python;SQL,32315.0
13,Netherlands,C;JavaScript;Python,38916.0
15,France,Bash/Shell/PowerShell;C;HTML/CSS;Java;Python;SQL,


# Alter Rows and Columns

In [24]:
df.rename(columns={'ConvertedComp': 'SalaryUSD'},inplace=True)

In [25]:
# Lets try to change values of No to False and Yes to True and null values as it is

df['SOAccount']=df['SOAccount'].apply(lambda x : True if x == 'Yes' else (False if x =='No' else x))



In [26]:
# Changing back to original using replace
df['SOAccount'] = df['SOAccount'].replace({False:'No',True:'Yes'})
df['SOAccount'].head()

Respondent
1     No
2    Yes
3    Yes
4    Yes
5    Yes
Name: SOAccount, dtype: object

In [27]:
# Another way to do this to use map method . Lets say same we have to for Hobbyist column
df['Hobbyist'].map({'Yes':True,'No': False})

Respondent
1         True
2        False
3         True
4         True
5         True
         ...  
64858     True
64867     True
64898     True
64925     True
65112     True
Name: Hobbyist, Length: 64461, dtype: object

# Sorting Data

In [34]:
# Sort the date country wise ascending and salary descinding 
df.sort_values(by=['Country','SalaryUSD'],ascending=[True,False],inplace=True)

df[['Country','SalaryUSD']].head(100)

Unnamed: 0_level_0,Country,SalaryUSD
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1
62788,Afghanistan,1000000.0
65381,Afghanistan,1000000.0
65456,Afghanistan,1000000.0
38672,Afghanistan,231192.0
28374,Afghanistan,130000.0
...,...,...
51520,Albania,15900.0
23498,Albania,13776.0
37526,Albania,12972.0
564,Albania,10812.0


In [37]:
# Make it default
df.sort_index(inplace=True)

In [39]:
# To see n largest value (salarires)
df['SalaryUSD'].nlargest(10)

Respondent
123     2000000.0
125     2000000.0
193     2000000.0
665     2000000.0
699     2000000.0
724     2000000.0
818     2000000.0
986     2000000.0
1022    2000000.0
1036    2000000.0
Name: SalaryUSD, dtype: float64

In [41]:
# We can apply this on DF as well specifing column
df.nlargest(5,'SalaryUSD')

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
123,I am a developer by profession,Yes,26.0,12,Weekly,120000.0,2000000.0,United States,United States dollar,USD,Cassandra;Firebase;MongoDB;PostgreSQL;Redis,Cassandra,"Developer, back-end;Developer, full-stack","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,"Languages, frameworks, and other technologies ...",Neither satisfied nor dissatisfied,"I’m not actively looking, but I am open to new...",C;C++;Go;Kotlin;Rust,Bash/Shell/PowerShell;C++;Go;Java;Python,Flutter;Hadoop;TensorFlow;Unreal Engine,.NET Core,Confluence;Jira;Github;Slack,Confluence;Jira;Github;Gitlab;Slack;Microsoft ...,Yes,Somewhat important,Very important,Trouble with my direct manager;Trouble with le...,"Read company media, such as employee blogs or ...",Once a year,Yes,No,No,Rarely: 1-2 days per year or less,Start a free trial;Ask developers I know/work ...,Indifferent,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Linux-based,"10,000 or more employees",Android;Arduino;AWS;Docker;Google Cloud Platfo...,AWS;Docker;Linux;Raspberry Pi,I have some influence,Straight / Heterosexual,Yes,"Yes, somewhat",Less than once per month or monthly,Daily or almost daily,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Flask;jQuery;React.js,Spring,Just as welcome now as I felt last year,36.0,8,3
125,"I am not primarily a developer, but I write co...",Yes,41.0,30,Monthly,200000.0,2000000.0,United States,United States dollar,USD,PostgreSQL,PostgreSQL,Data scientist or machine learning specialist;...,"Other doctoral degree (Ph.D., Ed.D., etc.)",Employed full-time,White or of European descent,Man,Flex time or a flexible schedule;Family friend...,Very satisfied,I am not interested in new job opportunities,Python,Python;SQL,Keras;Pandas;TensorFlow;Torch/PyTorch,Keras;Pandas;TensorFlow,Jira,Jira,Not sure,Neutral,Critically important,Better work/life balance,Personal network - friends or family;Directly ...,Once every few years,No,Yes,No,Occasionally: 1-2 days per quarter but less th...,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Windows,"5,000 to 9,999 employees",Docker;Kubernetes,Docker,I have little or no influence,Straight / Heterosexual,Not sure/can't remember,"No, not really",,Multiple times per day,Easy,Appropriate in length,No,,,,Just as welcome now as I felt last year,40.0,11,11
193,I am a developer by profession,Yes,29.0,16,Weekly,120000.0,2000000.0,United States,United States dollar,USD,,,"Developer, mobile","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,White or of European descent,Man,Specific department or team I’d be working on;...,Very satisfied,"I’m not actively looking, but I am open to new...",Objective-C;Swift,Java;JavaScript;Objective-C;Swift,,,"Github;Slack;Trello;Google Suite (Docs, Meet, ...",Github;Gitlab;Slack;Microsoft Teams;Trello;Goo...,No,Neutral,Fairly important,Curious about other opportunities;Better compe...,"Read company media, such as employee blogs or ...",Every few months,No,Onboarding? What onboarding?,No,Never,,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Play games;Visit Stack Overflow;Do other work ...,MacOS,10 to 19 employees,Android;iOS,Android;iOS,I have little or no influence,Straight / Heterosexual,Yes,"No, not really",Less than once per month or monthly,A few times per week,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",,,Just as welcome now as I felt last year,40.0,13,7
665,I am a developer by profession,Yes,24.0,13,Weekly,150000.0,2000000.0,United States,United States dollar,USD,,PostgreSQL,"Developer, front-end","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Black or of African descent;Biracial,Woman,Diversity of the company or organization;Flex ...,Very satisfied,I am not interested in new job opportunities,HTML/CSS;JavaScript;TypeScript,HTML/CSS;JavaScript;Python;Ruby,Node.js,Node.js,"Jira;Github;Slack;Google Suite (Docs, Meet, etc)","Jira;Github;Slack;Google Suite (Docs, Meet, etc)",Yes,Extremely important,Somewhat important,Better compensation;Trouble with leadership at...,"Read company media, such as employee blogs or ...",Once a year,Not sure,Yes,Yes,Occasionally: 1-2 days per quarter but less th...,,Indifferent,Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,MacOS,"1,000 to 4,999 employees",AWS;Google Cloud Platform;Kubernetes;Linux;Mac...,Google Cloud Platform;Heroku;MacOS;WordPress,I have little or no influence,Bisexual;Queer,Yes,"No, not really",I have never participated in Q&A on Stack Over...,A few times per month or weekly,Neither easy nor difficult,Appropriate in length,No,"A humanities discipline (such as literature, h...",React.js,Express;React.js;Ruby on Rails,Just as welcome now as I felt last year,40.0,4,Less than 1 year
699,"I am not primarily a developer, but I write co...",Yes,39.0,16,Weekly,52000.0,2000000.0,United States,United States dollar,USD,MariaDB;Microsoft SQL Server;MySQL;PostgreSQL,Microsoft SQL Server;MySQL,System administrator,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,Diversity of the company or organization;Offic...,Slightly satisfied,I am actively looking for a job,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,.NET;.NET Core;Pandas;Puppet;Unreal Engine,,Microsoft Teams;Microsoft Azure;Trello;Google ...,Jira;Microsoft Teams;Microsoft Azure;Trello;Go...,No,Somewhat important,Critically important,Growth or leadership opportunities,"Read company media, such as employee blogs or ...",Once a year,,No,No,Often: 1-2 days per week or more,Start a free trial;Visit developer communities...,"Hello, old friend",I have never visited any of these sites,Call a coworker or friend;Visit Stack Overflow...,Windows,100 to 499 employees,Android;Arduino;AWS;Google Cloud Platform;Kube...,WordPress,,Straight / Heterosexual,,,,,Neither easy nor difficult,Appropriate in length,No,"Information systems, information technology, o...",Angular;ASP.NET;ASP.NET Core;Express;Flask,,,40.0,5,2


In [42]:
# To see n smallest values
df.nsmallest(5,'SalaryUSD')

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
124,I am a developer by profession,No,38.0,14,Yearly,0.0,0.0,United States,United States dollar,USD,Firebase,Firebase,"Designer;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,,Very satisfied,I am not interested in new job opportunities,C++;Swift,Swift,Unreal Engine,,,,,,Not at all important/not necessary,,,Once every few years,Yes,,No,Often: 1-2 days per week or more,Start a free trial;Read ratings or reviews on ...,Indifferent,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Watch help / tutorial videos,MacOS,"Just me - I am a freelancer, sole proprietor, ...",iOS,iOS,,Straight / Heterosexual,Yes,"Yes, definitely",A few times per week,Multiple times per day,Easy,Too short,No,"A humanities discipline (such as literature, h...",,,Somewhat less welcome now than last year,60.0,24,19
238,"I am not primarily a developer, but I write co...",Yes,,13,Yearly,0.0,0.0,United States,United States dollar,USD,,,Academic researcher;Data scientist or machine ...,"Secondary school (e.g. American high school, G...",Employed part-time,South Asian,Man,"Languages, frameworks, and other technologies ...",Very satisfied,I am actively looking for a job,Dart;Go;Python;R;SQL,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,Flutter;Node.js;Pandas;TensorFlow;Torch/PyTorch,Cordova;Node.js;Pandas;TensorFlow,Github;Slack;Microsoft Azure;Trello;Google Sui...,Github;Gitlab;Slack;Microsoft Teams;Microsoft ...,No,Extremely important,Somewhat important,Better compensation;Better work/life balance;W...,"Read other media like news articles, founder p...",Every few months,No,Yes,Yes,Often: 1-2 days per week or more,,Amused,Stack Overflow (public Q&A for anyone who code...,Meditate;Visit Stack Overflow;Go for a walk or...,Windows,2 to 9 employees,AWS;Docker;Google Cloud Platform;Kubernetes;Li...,Arduino;AWS;Docker;Google Cloud Platform;Linux...,I have little or no influence,Straight / Heterosexual,Yes,"Yes, somewhat",Less than once per month or monthly,Daily or almost daily,Easy,Too long,No,,jQuery;React.js,jQuery,,12.0,4,2
884,I am a developer by profession,Yes,41.0,10,Yearly,0.0,0.0,United States,United States dollar,USD,Elasticsearch;MongoDB,MongoDB,"Developer, back-end;Developer, embedded applic...","Other doctoral degree (Ph.D., Ed.D., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,Industry that I’d be working in;Financial perf...,Very satisfied,"I’m not actively looking, but I am open to new...",HTML/CSS;JavaScript;Rust,Bash/Shell/PowerShell;HTML/CSS;JavaScript;Python,Keras;Node.js,Node.js,"Github;Slack;Trello;Google Suite (Docs, Meet, ...","Github;Slack;Trello;Google Suite (Docs, Meet, ...",,Somewhat important,Critically important,Having a bad day (or week or month) at work;Tr...,"Read company media, such as employee blogs or ...",Once every few years,No,,No,Often: 1-2 days per week or more,Start a free trial;Visit developer communities...,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Linux-based,2 to 9 employees,Docker;Linux;Raspberry Pi,AWS;Docker;Google Cloud Platform;Linux;Raspber...,,Straight / Heterosexual,Yes,"Yes, definitely",Daily or almost daily,Multiple times per day,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",React.js,React.js,Just as welcome now as I felt last year,50.0,25,10
1116,I am a developer by profession,Yes,25.0,11,Yearly,0.0,0.0,United States,United States dollar,USD,Firebase,Firebase,"Developer, back-end;Developer, front-end;Devel...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Flex time or a flexible schedule;Languages, fr...",Very satisfied,I am not interested in new job opportunities,,,,,Github,Github,No,Neutral,Fairly important,,"Read company media, such as employee blogs or ...",Once a year,Not sure,Yes,Yes,Never,Start a free trial,,Stack Overflow (public Q&A for anyone who codes),Meditate,Linux-based,"Just me - I am a freelancer, sole proprietor, ...",Linux,Android;AWS;Linux,,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per month or weekly,A few times per week,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",React.js,React.js,Just as welcome now as I felt last year,55.0,14,4
1278,I am a developer by profession,Yes,21.0,12,Yearly,0.0,0.0,United Kingdom,Pound sterling,GBP,Firebase;PostgreSQL;Redis,Cassandra;Elasticsearch;Firebase,"Database administrator;Developer, full-stack;D...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Neither satisfied nor dissatisfied,"I’m not actively looking, but I am open to new...",Bash/Shell/PowerShell;SQL;Swift;TypeScript,Bash/Shell/PowerShell;JavaScript;Objective-C;P...,Node.js,Cordova;Flutter;Node.js;React Native;Torch/PyT...,"Jira;Github;Trello;Google Suite (Docs, Meet, etc)",Jira;Github;Gitlab;Microsoft Teams;Trello;Goog...,No,Neutral,Fairly important,Wanting to share accomplishments with a wider ...,"Read company media, such as employee blogs or ...",Once a year,Yes,Onboarding? What onboarding?,No,Occasionally: 1-2 days per quarter but less th...,Start a free trial;Read ratings or reviews on ...,Amused,Stack Overflow (public Q&A for anyone who code...,Play games;Visit Stack Overflow;Go for a walk ...,MacOS,2 to 9 employees,Arduino;Docker;Google Cloud Platform;iOS;Linux...,Google Cloud Platform;iOS;Kubernetes;Linux;Ras...,,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per month or weekly,Daily or almost daily,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Express;Vue.js,Express;jQuery;React.js,Just as welcome now as I felt last year,40.0,9,2


# Grouping and Aggregating 

In [44]:
# Median Salary
df['SalaryUSD'].median()

54049.0

In [46]:
# This will look into data and give median values of all numeric datatypes
df.median()

Age               29.0
CompTotal      63000.0
SalaryUSD      54049.0
WorkWeekHrs       40.0
dtype: float64

In [48]:
# Describe the count ,mean , std ,min ,max values
# count is count of not null rows 
df.describe()

Unnamed: 0,Age,CompTotal,SalaryUSD,WorkWeekHrs
count,45446.0,34826.0,34756.0,41151.0
mean,30.834111,3.190464e+242,103756.1,40.782174
std,9.585392,inf,226885.3,17.816383
min,1.0,0.0,0.0,1.0
25%,24.0,20000.0,24648.0,40.0
50%,29.0,63000.0,54049.0,40.0
75%,35.0,125000.0,95000.0,44.0
max,279.0,1.1111110000000001e+247,2000000.0,475.0


In [50]:
df['SalaryUSD'].count()

34756

In [54]:
# Value_counts will give no of each values
df['Hobbyist'].value_counts()

Yes    50388
No     14028
Name: Hobbyist, dtype: int64

In [64]:
# Get the counts of each education level
df['EdLevel'].value_counts()

# schama_df.loc['EdLevel','QuestionText']

Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                          26542
Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                       13112
Some college/university study without earning a degree                                 7239
Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)     4771
Associate degree (A.A., A.S., etc.)                                                    1843
Other doctoral degree (Ph.D., Ed.D., etc.)                                             1690
Primary/elementary school                                                               941
Professional degree (JD, MD, etc.)                                                      800
I never completed any formal education                                                  493
Name: EdLevel, dtype: int64

In [66]:
# Get the counts of each education level in percentage 
df['EdLevel'].value_counts(normalize=True)

Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                          0.462155
Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                       0.228309
Some college/university study without earning a degree                                0.126047
Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)    0.083074
Associate degree (A.A., A.S., etc.)                                                   0.032091
Other doctoral degree (Ph.D., Ed.D., etc.)                                            0.029427
Primary/elementary school                                                             0.016385
Professional degree (JD, MD, etc.)                                                    0.013930
I never completed any formal education                                                0.008584
Name: EdLevel, dtype: float64

In [71]:
# Now we will see Group by . Groupby will split the DF based on column given then apply a function over them
# Then gives us a combine result.

# See filter example below to understand this. When we apply group by it will apply value_count function on each
# country and then combine the result and give

filt = df['Country'] == 'India'
df.loc[filt]['EdLevel'].value_counts()

Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                          4578
Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                       1432
Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)     410
Some college/university study without earning a degree                                 252
Primary/elementary school                                                               72
I never completed any formal education                                                  56
Professional degree (JD, MD, etc.)                                                      40
Other doctoral degree (Ph.D., Ed.D., etc.)                                              26
Associate degree (A.A., A.S., etc.)                                                      7
Name: EdLevel, dtype: int64

In [74]:
# Split the data
country_grp = df.groupby(['Country'])
country_grp

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000256F2BB1A00>

In [79]:
country_grp.get_group('India').head()

Unnamed: 0_level_0,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
7,I am a developer by profession,Yes,,18,Monthly,,,India,United States dollar,USD,,,"Developer, back-end;Developer, front-end;Devel...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Very satisfied,"I’m not actively looking, but I am open to new...",C#;HTML/CSS;PHP,C#;HTML/CSS;PHP,,,,,Yes,Extremely important,Very important,Better compensation;Growth or leadership oppor...,"Read company media, such as employee blogs or ...",Every few months,Yes,Yes,No,Sometimes: 1-2 days per month but less than we...,Start a free trial;Ask developers I know/work ...,,Stack Overflow (public Q&A for anyone who codes),,Windows,20 to 99 employees,,,,,Yes,"Yes, definitely",Multiple times per day,Multiple times per day,,,,"Computer science, computer engineering, or sof...",,,A lot more welcome now than last year,,6.0,4.0
22,I am a developer by profession,Yes,,18,Monthly,,,India,Indian rupee,INR,,,"Developer, full-stack;Engineer, data","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,Man,,Slightly dissatisfied,I am actively looking for a job,Java;JavaScript;Python,Java;Python,,,,,Not sure,,Very important,Better compensation;Better work/life balance;W...,Company reviews from third party sites (e.g. G...,Every few months,No,Yes,No,Often: 1-2 days per week or more,,,Stack Overflow (public Q&A for anyone who code...,,Windows,500 to 999 employees,,,,,Yes,"Yes, definitely",Multiple times per day,Multiple times per day,Easy,Appropriate in length,,"Computer science, computer engineering, or sof...",,,Just as welcome now as I felt last year,50.0,10.0,2.0
58,I am a developer by profession,Yes,,22,,,,India,Indian rupee,INR,Microsoft SQL Server,Microsoft SQL Server,"Developer, back-end",,"Independent contractor, freelancer, or self-em...",South Asian,,Family friendliness,Very satisfied,I am not interested in new job opportunities,C#;JavaScript;SQL;TypeScript,C#;JavaScript;SQL;TypeScript,.NET;.NET Core;Node.js,.NET;.NET Core;Node.js,Stack Overflow for Teams,Stack Overflow for Teams,Yes,Extremely important,Very important,,,Once every few years,,Yes,No,Often: 1-2 days per week or more,Ask developers I know/work with,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,Windows,,,,,,Yes,"Yes, definitely",,Multiple times per day,Neither easy nor difficult,Too long,,Web development or web design,Angular;Angular.js;ASP.NET;ASP.NET Core;jQuery,Angular;Angular.js;ASP.NET;ASP.NET Core;jQuery,,,,
63,I am a student who is learning to code,Yes,21.0,17,,,,India,,,DynamoDB,Firebase;MongoDB;MySQL,"Designer;Developer, back-end;Developer, front-...","Secondary school (e.g. American high school, G...","Not employed, but looking for work",South Asian,Man,Flex time or a flexible schedule;How widely us...,,I am actively looking for a job,Go;Kotlin;TypeScript,C;C++;HTML/CSS;Java;JavaScript;Python,Flutter;Node.js;React Native;TensorFlow,,Confluence;Stack Overflow for Teams,Github;Facebook Workplace;Slack;Microsoft Team...,,,,,,Once a year,Yes,,No,,Start a free trial;Ask developers I know/work ...,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Windows,,AWS;Docker;Heroku;iOS,Android;Arduino;Google Cloud Platform;Raspberr...,,Straight / Heterosexual,Yes,Neutral,A few times per week,Daily or almost daily,Easy,Appropriate in length,No,,Angular.js;Django;jQuery;Laravel;Vue.js,,Not applicable - I did not use Stack Overflow ...,,4.0,
149,I am a developer by profession,Yes,36.0,31,Yearly,21000000.0,293196.0,India,Indian rupee,INR,Cassandra;MySQL;PostgreSQL,MySQL;PostgreSQL,"Developer, back-end;Developer, QA or test;DevO...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,East Asian,Man,"Languages, frameworks, and other technologies ...",Very satisfied,I am not interested in new job opportunities,Bash/Shell/PowerShell;C;C++;Java;Perl;Scala,Bash/Shell/PowerShell;C++;Perl,Apache Spark,,Confluence;Jira;Github;Gitlab;Google Suite (Do...,"Confluence;Jira;Github;Google Suite (Docs, Mee...",Not sure,Neutral,Critically important,Just because;Curious about other opportunities...,"Read company media, such as employee blogs or ...",Once a year,Not sure,Onboarding? What onboarding?,No,Sometimes: 1-2 days per month but less than we...,,Indifferent,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Linux-based,500 to 999 employees,Google Cloud Platform;Linux,Linux,I have little or no influence,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per month or weekly,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,70.0,5.0,3.0


In [83]:
# Country wise count of education levels
country_grp['EdLevel'].value_counts().head(50)

Country              EdLevel                                                                           
Afghanistan          Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                           27
                     I never completed any formal education                                                 12
                     Some college/university study without earning a degree                                  7
                     Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                         4
                     Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)      4
                     Other doctoral degree (Ph.D., Ed.D., etc.)                                              3
                     Primary/elementary school                                                               2
                     Professional degree (JD, MD, etc.)                                                      1
Albania 

In [86]:
# Country wise count of education levels . These are having multiple indices
country_grp['EdLevel'].value_counts().loc['India']

EdLevel
Bachelor’s degree (B.A., B.S., B.Eng., etc.)                                          4578
Master’s degree (M.A., M.S., M.Eng., MBA, etc.)                                       1432
Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)     410
Some college/university study without earning a degree                                 252
Primary/elementary school                                                               72
I never completed any formal education                                                  56
Professional degree (JD, MD, etc.)                                                      40
Other doctoral degree (Ph.D., Ed.D., etc.)                                              26
Associate degree (A.A., A.S., etc.)                                                      7
Name: EdLevel, dtype: int64

In [91]:
# Country wise median salaries
country_grp['SalaryUSD'].median().head(15)

Country
Afghanistan            15163.5
Albania                15900.0
Algeria                 9432.0
Andorra                88640.0
Angola                  5292.0
Antigua and Barbuda        NaN
Argentina              17520.0
Armenia                21066.0
Australia              76831.0
Austria                51888.0
Azerbaijan             11988.0
Bahamas                86706.0
Bahrain                40196.0
Bangladesh              7068.0
Barbados               25074.0
Name: SalaryUSD, dtype: float64

In [93]:
country_grp['SalaryUSD'].median().loc['India']

10056.0

In [102]:
country_grp['SalaryUSD'].max().loc['India']

1800000.0

In [106]:
# Using multiple aggregate functions for groups
country_grp['SalaryUSD'].agg(['max','median'])

Unnamed: 0_level_0,max,median
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,1000000.0,15163.5
Albania,194580.0,15900.0
Algeria,36000.0,9432.0
Andorra,141608.0,88640.0
Angola,5736.0,5292.0
...,...,...
"Venezuela, Bolivarian Republic of...",24000.0,3600.0
Viet Nam,1000000.0,10344.0
Yemen,36000.0,36000.0
Zambia,60000.0,5452.0


In [199]:
# Using multiple aggregate functions for groups
country_grp['SalaryUSD'].agg(['max','median']).loc['India']

max       1800000.0
median      10056.0
Name: India, dtype: float64

In [207]:
# This we added later , we can agg on multiple columns at same time 
df['Age1stCode'].replace('Younger than 5 years',4,inplace=True)
df['Age1stCode'].replace('Older than 85',86,inplace=True)

In [210]:
df['Age1stCode'] = df['Age1stCode'].astype(float)

In [211]:
country_grp.agg({'Age1stCode':'max','SalaryUSD':'max'})

Unnamed: 0_level_0,Age1stCode,SalaryUSD
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,23.0,1000000.0
Albania,32.0,194580.0
Algeria,49.0,36000.0
Andorra,86.0,141608.0
Angola,21.0,5736.0
...,...,...
"Venezuela, Bolivarian Republic of...",42.0,24000.0
Viet Nam,86.0,1000000.0
Yemen,37.0,36000.0
Zambia,45.0,60000.0


In [126]:
# Try to find in particular country how many people worked in Python
# lang_filt = df['LanguageWorkedWith'].str.contains('Python' ,na=False)
coun_filt = df['Country'] == 'India'
df[coun_filt]['LanguageWorkedWith'].str.contains('Python').sum()


2670

In [137]:
# Using group by , here country_grp['LanguageWorkedWith'] is Groupby Series
# So we will use apply method to apply our function to each series 

country_grp['LanguageWorkedWith'].apply(lambda x : x.str.contains('Python').sum()).loc['India']

2670

In [139]:
country_grp['LanguageWorkedWith'].apply(lambda x : x.str.contains('Python').sum())

Country
Afghanistan                              11
Albania                                  13
Algeria                                  40
Andorra                                   3
Angola                                    1
                                       ... 
Venezuela, Bolivarian Republic of...     29
Viet Nam                                102
Yemen                                     1
Zambia                                    4
Zimbabwe                                 13
Name: LanguageWorkedWith, Length: 183, dtype: int64

In [142]:
# Now we will try to find out what % of people from each country knows python

countries_respondents = df['Country'].value_counts()
countries_respondents

United States     12469
India              8403
United Kingdom     3896
Germany            3890
Canada             2191
                  ...  
Grenada               1
Liechtenstein         1
Chad                  1
North Korea           1
Nauru                 1
Name: Country, Length: 183, dtype: int64

In [144]:
countr_uses_pyhton = country_grp['LanguageWorkedWith'].apply(lambda x : x.str.contains('Python').sum())


In [150]:
# We can concatinate these 2 series to new DF 

new_df = pd.concat([countries_respondents,countr_uses_pyhton], axis='columns')
new_df.rename(columns={'Country': 'Num_of_people','LanguageWorkedWith': 'Num_knows_python'},inplace=True)
# This gives total number of people and people who knows python for each country

In [154]:
# Now calculate percentage
new_df['percent'] = new_df['Num_knows_python'] / new_df['Num_of_people'] * 100


Unnamed: 0,Num_of_people,Num_knows_python,percent
United States,12469,5964,47.830620
India,8403,2670,31.774366
United Kingdom,3896,1621,41.606776
Germany,3890,1712,44.010283
Canada,2191,1011,46.143314
...,...,...,...
Grenada,1,0,0.000000
Liechtenstein,1,0,0.000000
Chad,1,0,0.000000
North Korea,1,0,0.000000


In [171]:
new_df.loc['India']

Num_of_people       8403.000000
Num_knows_python    2670.000000
percent               31.774366
Name: India, dtype: float64

# Cleaning Data and TypeCasting

In [181]:
# Lets try to find average year of deveoplers
schama_df.loc['YearsCode','QuestionText']

'Including any education, how many years have you been coding in total?'

In [186]:
df['YearsCode']

Respondent
1         36
2          7
3          4
4          7
5         15
        ... 
65634    NaN
65635      6
65636    NaN
65637    NaN
65639    NaN
Name: YearsCode, Length: 64461, dtype: object

In [188]:
df['YearsCode'] = df['YearsCode'].astype(float)

ValueError: could not convert string to float: 'Less than 1 year'

In [191]:
# Too check all the unique values
# We will replace less than 1 year to 0 and more than 50 years to 51
df['YearsCode'].unique()

array(['36', '7', '4', '15', '6', '17', '8', '10', '35', '5', '37', '19',
       '9', '22', '30', '23', '20', '2', 'Less than 1 year', '3', '13',
       '25', '16', '43', '11', '38', '33', nan, '24', '21', '12', '40',
       '27', '50', '46', '14', '18', '28', '32', '44', '26', '42', '31',
       '34', '29', '1', '39', '41', '45', 'More than 50 years', '47',
       '49', '48'], dtype=object)

In [192]:
df['YearsCode'].replace('Less than 1 year',0,inplace=True)
df['YearsCode'].replace('More than 50 years',51,inplace=True)

In [194]:
df['YearsCode'].unique()

array(['36', '7', '4', '15', '6', '17', '8', '10', '35', '5', '37', '19',
       '9', '22', '30', '23', '20', '2', 0, '3', '13', '25', '16', '43',
       '11', '38', '33', nan, '24', '21', '12', '40', '27', '50', '46',
       '14', '18', '28', '32', '44', '26', '42', '31', '34', '29', '1',
       '39', '41', '45', 51, '47', '49', '48'], dtype=object)

In [195]:
df['YearsCode'] = df['YearsCode'].astype(float)

In [197]:
df['YearsCode'].describe()

count    57684.000000
mean        12.709053
std          9.717353
min          0.000000
25%          6.000000
50%         10.000000
75%         17.000000
max         51.000000
Name: YearsCode, dtype: float64

In [198]:
# So Avg years of experience is around 13 years
df['YearsCode'].mean()

12.709052770265584