# Part 1: Pandas

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/survey_results_public.csv')

In [3]:
df.head(10); # ';' hides the output of the shell

In [4]:
df.shape

(88883, 85)

#### info method, provides no. of rows and columns and data type of all of the columns.

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88883 entries, 0 to 88882
Data columns (total 85 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Respondent              88883 non-null  int64  
 1   MainBranch              88331 non-null  object 
 2   Hobbyist                88883 non-null  object 
 3   OpenSourcer             88883 non-null  object 
 4   OpenSource              86842 non-null  object 
 5   Employment              87181 non-null  object 
 6   Country                 88751 non-null  object 
 7   Student                 87014 non-null  object 
 8   EdLevel                 86390 non-null  object 
 9   UndergradMajor          75614 non-null  object 
 10  EduOther                84260 non-null  object 
 11  OrgSize                 71791 non-null  object 
 12  DevType                 81335 non-null  object 
 13  YearsCode               87938 non-null  object 
 14  Age1stCode              87634 non-null

#### pd.set_option(), displaying all the columns and rows

In [6]:
pd.set_option('display.max_columns',85)
#pd.set_option('display.max_rows',85)

#### ';' semicolon hides the output of the cell also 'o' on the command mode does the same thing

In [7]:
df;

#### comparing column names and questions

In [8]:
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [9]:
schema_df.head(85)

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,OpenSourcer,How often do you contribute to open source?
4,OpenSource,How do you feel about the quality of open sour...
...,...,...
80,Sexuality,Which of the following do you currently identi...
81,Ethnicity,Which of the following do you identify as? Ple...
82,Dependents,"Do you have any dependents (e.g., children, el..."
83,SurveyLength,How do you feel about the length of the survey...


# Part 2: Dataframe in python
It's like dictionary, where dict keys act as columns and dict values as rows which are in a list. for example

In [52]:
people = {
    'first': ['prasiddha','ram','hari'],
    'last': ['pokhrel','regmi','poudel'],
    'email': ['prasiddhapokhrel@gmail.com','ram@email.com','hari@email.com']
}

In [53]:
people['first']

['prasiddha', 'ram', 'hari']

In [54]:
import pandas as pd

In [55]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddhapokhrel@gmail.com
1,ram,regmi,ram@email.com
2,hari,poudel,hari@email.com


In [56]:
df['email']
# type(df['email'])

pandas.core.series.Series

##### df.email also gives the same result, but df['email']  is prefered because if column has same name as dataframe's method, like df.count then it would give some errors so df['email'] is preffered

#### Accessing multiple columns

In [15]:
df[['first','last']]

Unnamed: 0,first,last
0,prasiddha,pokhrel
1,ram,regmi
2,hari,poudel


### Getting all the columns

In [16]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

### Getting all the rows
In order to get rows we can use loc and iloc,
iloc allows us to access rows by integer location hence iloc-> integer location

#### Accessing first row by iloc

In [17]:
df.iloc[0]

first                     prasiddha
last                        pokhrel
email    prasiddhapokhrel@gmail.com
Name: 0, dtype: object

#### Accessing multiple rows
By passing list of index

In [18]:
df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddhapokhrel@gmail.com
1,ram,regmi,ram@email.com


#### Accessing row and columns by using iloc

For that first we pass rows as arguments and columns for example:
df.iloc[[0,1],0] returns 0 and 1 row with 0th column (first column)

In [19]:
df.iloc[[0,1],0]

0    prasiddha
1          ram
Name: first, dtype: object

In [20]:
df.iloc[[0,1],[0,2]]

Unnamed: 0,first,email
0,prasiddha,prasiddhapokhrel@gmail.com
1,ram,ram@email.com


## Let's search by loc
With loc we are going to be searching by labels, labels for rows will be default range of integers.
It will be somewhat similar to iloc, but we will look usecases of loc later

In [21]:
df.loc[0]

first                     prasiddha
last                        pokhrel
email    prasiddhapokhrel@gmail.com
Name: 0, dtype: object

In [58]:
df.loc[[0,1]]

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddhapokhrel@gmail.com
1,ram,regmi,ram@email.com


In [23]:
df.loc[[0,1],['last','email']]

Unnamed: 0,last,email
0,pokhrel,prasiddhapokhrel@gmail.com
1,regmi,ram@email.com


In [24]:
df = pd.read_csv('data/survey_results_public.csv')
df.head(5)

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3.0,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16.0,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [25]:
df.shape

(88883, 85)

In [26]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',
       'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',
       'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',
       'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',
       'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',
       'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',
       'CompTotal', 'CompFreq', 'ConvertedComp', 'WorkWeekHrs', 'WorkPlan',
       'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',
       'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',
       'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',
       'DatabaseDesireNextYear', 'PlatformWorkedWith',
       'PlatformDesireNextYear', 'WebFrameWorkedWith',
       'WebFrameDesireNextYear', 'MiscTechWorkedWith',
       'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',
       'BlockchainOrg', 'BlockchainIs', 'BetterLife'

In [27]:
df['Hobbyist']

0        Yes
1         No
2        Yes
3         No
4        Yes
        ... 
88878    Yes
88879     No
88880     No
88881     No
88882    Yes
Name: Hobbyist, Length: 88883, dtype: object

In [28]:
df_schema = pd.read_csv('data/survey_results_schema.csv')
df_schema

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,OpenSourcer,How often do you contribute to open source?
4,OpenSource,How do you feel about the quality of open sour...
...,...,...
80,Sexuality,Which of the following do you currently identi...
81,Ethnicity,Which of the following do you identify as? Ple...
82,Dependents,"Do you have any dependents (e.g., children, el..."
83,SurveyLength,How do you feel about the length of the survey...


In [29]:
df_schema.iloc[2]

Column                         Hobbyist
QuestionText    Do you code as a hobby?
Name: 2, dtype: object

#### Counting values

In [30]:
df['Hobbyist'].value_counts()

Hobbyist
Yes    71257
No     17626
Name: count, dtype: int64

#### Getting specific rows and column

In [31]:
df.loc[[0,1,2],'Hobbyist']

0    Yes
1     No
2    Yes
Name: Hobbyist, dtype: object

#### Slicing in pandas is inclusive unlike list, it returns the last parameter of the slice
Here the loc returns 0,1 and 2 rows, it would be 0 and 1 in python list

In [32]:
df.loc[0:2,'Hobbyist']

0    Yes
1     No
2    Yes
Name: Hobbyist, dtype: object

#### Using slicing method to get multiple rows and columns


In [33]:
df.loc[0:2,'Hobbyist':'Employment']

Unnamed: 0,Hobbyist,OpenSourcer,OpenSource,Employment
0,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work"
1,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work"
2,Yes,Never,The quality of OSS and closed source software ...,Employed full-time


# Part 3: How to Set, Reset, and Use Indexes

Indexes are used to indentify the rows, we can use default 0, 1, 2 indexes to identify the rows.
However we can also provide names to the rows by ourself and reset them too.

### Let's see in practice

In [34]:
people = {
    'first': ['prasiddha', 'ram', 'hari'],
    'last': ['pokhrel', 'regmi', 'poudel'],
    'email': ['prasiddha@email.com', 'ram@email.com', 'hari@email.com']
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com
1,ram,regmi,ram@email.com
2,hari,poudel,hari@email.com


In [35]:
df.set_index('email', inplace = True) # setting index in the current dataframe with inplace = True

In [36]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
prasiddha@email.com,prasiddha,pokhrel
ram@email.com,ram,regmi
hari@email.com,hari,poudel


In [37]:
df.index

Index(['prasiddha@email.com', 'ram@email.com', 'hari@email.com'], dtype='object', name='email')

In [38]:
# accessing with index name (ram@email.com) and column name (first)
df.loc['ram@email.com', 'first']

'ram'

In [63]:
# now we can't use loc[0] but we can we iloc[0]
# a = df.loc[0] # throws error
# print(a)

b = df.iloc[0]
print(b)

first                     prasiddha
last                        pokhrel
email    prasiddhapokhrel@gmail.com
Name: 0, dtype: object


### Resetting the index

In [40]:
df.reset_index(inplace = True)
df

Unnamed: 0,email,first,last
0,prasiddha@email.com,prasiddha,pokhrel
1,ram@email.com,ram,regmi
2,hari@email.com,hari,poudel


## Practicing in stackoverflow data

In [41]:
df = pd.read_csv('data/survey_results_public.csv', index_col ='Respondent') # setting index when loading data with the column name 'Respondent'
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column')

In [42]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [43]:
df.head()

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3.0,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16.0,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [73]:
schema_df.shape

(85, 1)

In [77]:
schema_df.head()

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...
Hobbyist,Do you code as a hobby?
OpenSourcer,How often do you contribute to open source?
OpenSource,How do you feel about the quality of open sour...


In [44]:
df.loc[1] # accessing row by index name

MainBranch                           I am a student who is learning to code
Hobbyist                                                                Yes
OpenSourcer                                                           Never
OpenSource                The quality of OSS and closed source software ...
Employment                           Not employed, and not looking for work
Country                                                      United Kingdom
Student                                                                  No
EdLevel                                           Primary/elementary school
UndergradMajor                                                          NaN
EduOther                  Taught yourself a new language, framework, or ...
OrgSize                                                                 NaN
DevType                                                                 NaN
YearsCode                                                                 4
Age1stCode  

In [79]:
schema_df['QuestionText']

Column
Respondent                Randomized respondent ID number (not in order ...
MainBranch                Which of the following options best describes ...
Hobbyist                                            Do you code as a hobby?
OpenSourcer                     How often do you contribute to open source?
OpenSource                How do you feel about the quality of open sour...
Employment                Which of the following best describes your cur...
Country                           In which country do you currently reside?
Student                   Are you currently enrolled in a formal, degree...
EdLevel                   Which of the following best describes the high...
UndergradMajor            What was your main or most important field of ...
EduOther                  Which of the following types of non-degree edu...
OrgSize                   Approximately how many people are employed by ...
DevType                   Which of the following describe you? Please se...
Years

In [66]:
# passing row index as 'MgrIdiot' and column name we need to access as 'QuestionText'
schema_df.loc['MgrIdiot', 'QuestionText']

'How confident are you that your manager knows what they’re doing?'

### Sorting

In [80]:
schema_df.sort_index(ascending = False, inplace = True)

In [81]:
schema_df

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
YearsCodePro,How many years have you coded professionally (...
YearsCode,"Including any education, how many years have y..."
WorkWeekHrs,"On average, how many hours per week do you work?"
WorkRemote,How often do you work remotely?
WorkPlan,How structured or planned is your work?
WorkLoc,Where would you prefer to work?
WorkChallenge,"Of these options, what are your greatest chall..."
WelcomeChange,"Compared to last year, how welcome do you feel..."
WebFrameWorkedWith,Which of the following web frameworks have you...
WebFrameDesireNextYear,Which of the following web frameworks have you...


# Part 4: Filtering data

In Datascience we always start with filtering out the data. So it is one of the important skills to learn. Let's see in practice

### Let's see in practice

In [84]:
people = {
    'first': ['prasiddha', 'ram', 'hari'],
    'last': ['pokhrel', 'regmi', 'regmi'],
    'email': ['prasiddha@email.com', 'ramregmi@email.com', 'hariregmi@email.com']
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com
1,ram,regmi,ramregmi@email.com
2,hari,regmi,hariregmi@email.com


### Using df to filter data
But loc is more preferred because we can pass column name to be filtered

In [85]:
# filtering people with last name = 'regmi'
filt = df['last'] == 'regmi'
filt

0    False
1     True
2     True
Name: last, dtype: bool

In [89]:
df[filt]
df[pd.Series([False, True, True], index = [0,1,2])] # both lines provides same output

Unnamed: 0,first,last,email
1,ram,regmi,ramregmi@email.com
2,hari,regmi,hariregmi@email.com


### Using loc to filter data
Because it gives convenience of getting specific columns also.

In [None]:
# filtering people with last name 'regmi' and getting their email
filt = df['last'] == 'regmi'
df.loc[filt, 'email'] # here 'email' is the dataframe column

### Using conditional operators '&' and '|' to filter out data

1. Filtering people which have first name ram and last name regmi
2. Filtering people which have first name prasiddha and last name regmi

In [None]:
filt = (df['first'] == 'ram') & (df['last'] == 'regmi')
df.loc[filt, ['first', 'last', 'email']]

In [90]:
filt = (df['first'] == 'prasiddha') | (df['last'] == 'regmi')
df.loc[filt]

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com
1,ram,regmi,ramregmi@email.com
2,hari,regmi,hariregmi@email.com


### Using '~' tilde to filter opposite value of the filter

In [91]:
# filtering people which doesn't have last 'regmi'
filt = df['last'] == 'regmi'
df.loc[~filt]

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com


## Practicing filter in stackoverflow data

In [5]:
df = pd.read_csv('data/survey_results_public.csv', index_col ='Respondent') # setting index when loading data with the column name 'Respondent'
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column')

In [6]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [7]:
df.head(2)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


In [8]:
schema_df

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...
Hobbyist,Do you code as a hobby?
OpenSourcer,How often do you contribute to open source?
OpenSource,How do you feel about the quality of open sour...
Employment,Which of the following best describes your cur...
Country,In which country do you currently reside?
Student,"Are you currently enrolled in a formal, degree..."
EdLevel,Which of the following best describes the high...
UndergradMajor,What was your main or most important field of ...


In [9]:
schema_df.loc['ConvertedComp', 'QuestionText']

'Salary converted to annual USD salaries using the exchange rate on 2019-02-01, assuming 12 working months and 50 working weeks.'

In [10]:
# filtering salary over 40000
high_salary = df['ConvertedComp'] > 40000
df.loc[high_salary,['Country', 'LanguageWorkedWith', 'ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,United States,C;C++;C#;Python;SQL,61000.0
6,Canada,Java;R;SQL,366420.0
9,New Zealand,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;P...,95179.0
13,United States,Bash/Shell/PowerShell;HTML/CSS;JavaScript;PHP;...,90000.0
14,Germany,C++,57060.0
...,...,...,...
88877,United States,Bash/Shell/PowerShell;C;Clojure;HTML/CSS;Java;...,2000000.0
88878,United States,HTML/CSS;JavaScript;Scala;TypeScript,130000.0
88879,Finland,Bash/Shell/PowerShell;C++;Python,82488.0
88881,Austria,Bash/Shell/PowerShell;Go;HTML/CSS;Java;JavaScr...,68745.0


### Filtering data with country names

In [11]:
countries = ['Nepal', 'United States', 'India', 'Canada', 'Germany']
filt = df['Country'].isin(countries)
df.loc[filt, 'Country']

Respondent
4        United States
6               Canada
8                India
10               India
12              Canada
             ...      
85182           Canada
85642    United States
86012            India
88282    United States
88377           Canada
Name: Country, Length: 39508, dtype: object

### Get people who knew 'Python' as programming language

Using str method in pandas

In [12]:
filt = df['LanguageWorkedWith'].str.contains('Python', na = False)
filt
df.loc[filt,'LanguageWorkedWith']

Respondent
1                          HTML/CSS;Java;JavaScript;Python
2                                      C++;HTML/CSS;Python
4                                      C;C++;C#;Python;SQL
5              C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA
8        Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
                               ...                        
84539    Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
85738      Bash/Shell/PowerShell;C++;Python;Ruby;Other(s):
86566      Bash/Shell/PowerShell;HTML/CSS;Python;Other(s):
87739             C;C++;HTML/CSS;JavaScript;PHP;Python;SQL
88212                           HTML/CSS;JavaScript;Python
Name: LanguageWorkedWith, Length: 36443, dtype: object

In [13]:
df.loc[88816,'LanguageWorkedWith']

nan

In [14]:
# filtering respondent from Nepal
df['Country'].value_counts().get('Nepal',0)

237

In [35]:
# filtering who have salary higher than 12000 and from Nepal
filt = (df['Country'] == 'Nepal') & (df['ConvertedComp'] > 12000)
filt.value_counts().get(True,0)

8

In [37]:
filt = (df['Country'].str.contains('Nepal')) & (df['LanguageWorkedWith'].str.contains('Python')) & (df['ConvertedComp'] > 1200)
filt.value_counts().get(True,0)
df.loc[filt, 'ConvertedComp'].mean()

9903.225806451614

In [109]:
df.loc[filt, ['LanguageWorkedWith','ConvertedComp']]

Unnamed: 0_level_0,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1
5993,HTML/CSS;JavaScript;PHP,300000.0
25261,C#;HTML/CSS;Java;JavaScript;PHP;Python;SQL,21096.0
39526,Assembly;C;C++;C#;HTML/CSS;Java;JavaScript;PHP...,100000.0
43075,JavaScript;PHP;SQL,30000.0
61673,C#;HTML/CSS;JavaScript;SQL;TypeScript,12660.0
77938,Bash/Shell/PowerShell;C++;HTML/CSS;Java;JavaSc...,12660.0
79024,HTML/CSS;Java;JavaScript;SQL;Other(s):,15818.0
85446,Java;JavaScript;Python;SQL,21096.0


### Using boolean masking to get first 5 rows

In [43]:
mask = [True] * 5 + [False] * (len(df) - 5)

In [44]:
len(mask)

88883

In [45]:
df.loc[mask]

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3.0,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16.0,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


## Part 5: Updating Rows and Columns - Modifying Data within DataFrames
We will learn to how to update the data for our rows and columns and in next chapter we will learn how to remove rows and columns from our dataframes

What we will learn:

    - Changing column names at once and using rename method and list comprehension
    - String method replace, upper, lower
    - Update values of rows (series) by filtering using 'loc' and by using lambda function
    - Update values of dataframe using applymap
    - apply, map, applymap and replace

In [66]:
people = {
    'first': ['prasiddha', 'ram', 'hari'],
    'last': ['pokhrel', 'regmi', 'regmi'],
    'email': ['Prasiddha@email.com', 'RamRegmi@email.com', 'HariRegmi@email.com']
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,regmi,HariRegmi@email.com


### Changing the column names
There are two approach first approach isn't preferred, second approach is preferred

In [47]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

#### First approach to rename column names: use when all the columns name needed to change

In [None]:
# first approach: use when all the columns name needed to change
df.columns = ['first name', 'last name', 'email']

In [None]:
df

### Using list comprehension and str.upper() to change all the column names to uppercase

In [50]:
# df.columns = [x.upper() for x in df.columns]
# df
df.columns = df.columns.str.upper()
df

Unnamed: 0,FIRST,LAST,EMAIL
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,regmi,HariRegmi@email.com


### Using str.replace() to update column name

In [64]:
df.columns = df.columns.str.replace(" ", "_")
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first,last,email,last.1
0,prasiddha,pokhrel,Prasiddha@email.com,
1,ram,regmi,RamRegmi@email.com,
2,hari,regmi,HariRegmi@email.com,regmi


#### Second approach to rename column names: use when need to change specific columns

In [65]:
df.rename(columns={'first_name':'first', 'last_name':'last'}, inplace = True)
df

Unnamed: 0,first,last,email,last.1
0,prasiddha,pokhrel,Prasiddha@email.com,
1,ram,regmi,RamRegmi@email.com,
2,hari,regmi,HariRegmi@email.com,regmi


### Changing the value of single and multiple row

In [None]:
# changing value of a single row
df.loc[2] = ['hari','pokhrel', 'HariPokhrel@email.com']
df

In [None]:
# changing last and email (multiple values)
df.loc[2, ['last', 'email']] = ['regmi', 'HariRegmi@email.com']
df

In [None]:
# changing last (single values)
df.loc[2, 'last'] = 'pokhrel'
df

### Using 'at' instead of 'loc'

In [67]:
df.at[2, 'last'] = 'regmi'
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,regmi,HariRegmi@email.com


In [75]:
filt = (df['email'] == 'HariRegmi@email.com')
df[filt]['last'] = 'poudel' # gives error, instead use loc to update the value
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'poudel' # gives error, instead use loc to update the value


Unnamed: 0,first,last,email
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,regmi,HariRegmi@email.com


In [79]:
df[filt]['last']

2    poudel
Name: last, dtype: object

In [78]:
df.loc[filt, 'last'] = 'poudel'
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,poudel,HariRegmi@email.com


In [82]:
df[['email']]

Unnamed: 0,email
0,Prasiddha@email.com
1,RamRegmi@email.com
2,HariRegmi@email.com


In [85]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com
1,ram,regmi,ramregmi@email.com
2,hari,poudel,hariregmi@email.com


### apply, map, applymap, replace methods
1. apply: apply is used to calling a function on our values apply can work on either on dataframe or in series object. Behaviour might be different for each of those objects (dataframe and series)
    - Running apply on a series applies a function to every elements in the series
    - Running apply on a dataframe applies a function to every series in the dataframe

What if we can apply a function to every element in dataframe. THAT'S WHERE APPLYMAP USED FOR

2. applymap: applymap only works for dataframe.
    - It is used to operate for each elements of the dataframe

3. map: map only works for series object
    - It is used for substituting each value in a series with another value

4. replace: 
    - It is used to work with specific elements of the dataframe/series

### apply

In [87]:
df['email'].apply(len)

0    19
1    18
2    19
Name: email, dtype: int64

#### apply function in our custom function and lambda function

In [88]:
def update_email(email):
    return email.upper()

In [91]:
df['email'].apply(update_email)

0    PRASIDDHA@EMAIL.COM
1     RAMREGMI@EMAIL.COM
2    HARIREGMI@EMAIL.COM
Name: email, dtype: object

In [92]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,PRASIDDHA@EMAIL.COM
1,ram,regmi,RAMREGMI@EMAIL.COM
2,hari,poudel,HARIREGMI@EMAIL.COM


In [None]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

In [93]:
df['email'].apply(len) # in default it applies through all the rows.

0    19
1    18
2    19
Name: email, dtype: int64

#### 'apply' function in DataFrame object

In [94]:
df.apply(len) # in default it applies through all the rows (series object). it is giving out the no of rows

first    3
last     3
email    3
dtype: int64

In [95]:
df.apply(len, axis = 'columns') # it is counting the no. of columns

0    3
1    3
2    3
dtype: int64

#### applying min function in Series object

In [96]:
df.apply(pd.Series.min)

first                   hari
last                 pokhrel
email    HARIREGMI@EMAIL.COM
dtype: object

In [97]:
df.apply(lambda x: x.min())

first                   hari
last                 pokhrel
email    HARIREGMI@EMAIL.COM
dtype: object

### applymap

In [98]:
df.applymap(len)

Unnamed: 0,first,last,email
0,9,7,19
1,3,5,18
2,4,6,19


In [99]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,prasiddha@email.com
1,ram,regmi,ramregmi@email.com
2,hari,poudel,hariregmi@email.com


### map

In [100]:
df['first'].map({'prasiddha':'prasidh', 'ram':'ramu'})

0    prasidh
1       ramu
2        NaN
Name: first, dtype: object

### replace

In [101]:
df['first'] = df['first'].replace({'prasiddha':'prasidh', 'ram':'ramu'})
df

Unnamed: 0,first,last,email
0,prasidh,pokhrel,PRASIDDHA@EMAIL.COM
1,ramu,regmi,RAMREGMI@EMAIL.COM
2,hari,poudel,HARIREGMI@EMAIL.COM


## Practicing apply, applymap, map, replace in stackoverflow data

In [102]:
df = pd.read_csv('data/survey_results_public.csv', index_col ='Respondent') # setting index when loading data with the column name 'Respondent'
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column')

In [None]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [103]:
df.head(1)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


In [None]:
df.rename(columns={'ConvertedComp':'SalaryUSD'}, inplace = True)
df

In [104]:
# using map to change hobbyist values
df['Hobbyist'] = df['Hobbyist'].map({'Yes': True, 'No': False})
df.head(2)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,True,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,False,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


In [105]:
# using replace to change hobbyist values
df['Hobbyist'] = df['Hobbyist'].replace({True: 'Yes', False: 'No'})
df.head(2)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


## Part 6: Add Remove Rows and Columns From DataFrames

We will be learning:

        - to add and remove rows and columns from dataframes
        - combine multiple columns into one

Methods learned:
    
    - drop:
        - drop columns: df.drop(columns = ['first','last'], inplace = True)
        - drop rows: df.drop(index=[1,2])
    
    - str.split(' '):
        - to split the values according to the passed parameter into the split method, in default it splits by empty space

    - pd.concat:
        - to add rows into the dataframe
        - df = pd.concat([df, df3], ignore_index=True)
 

append method is deprecated in Pandas 2.0.0, so concat is used

In [108]:
people = {
    'first': ['prasiddha', 'ram', 'hari'],
    'last': ['pokhrel', 'regmi', 'regmi'],
    'email': ['Prasiddha@email.com', 'RamRegmi@email.com', 'HariRegmi@email.com']
}

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,prasiddha,pokhrel,Prasiddha@email.com
1,ram,regmi,RamRegmi@email.com
2,hari,regmi,HariRegmi@email.com


### Combine columns

In [109]:
df['first'] + ' ' + df['last']

0    prasiddha pokhrel
1            ram regmi
2           hari regmi
dtype: object

In [111]:
df['full_name'] = df['first'] + ' ' + df['last']

In [112]:
df

Unnamed: 0,first,last,email,full_name
0,prasiddha,pokhrel,Prasiddha@email.com,prasiddha pokhrel
1,ram,regmi,RamRegmi@email.com,ram regmi
2,hari,regmi,HariRegmi@email.com,hari regmi


### Removing columns

In [113]:
df.drop(columns = ['first','last'], inplace = True)
df

Unnamed: 0,email,full_name
0,Prasiddha@email.com,prasiddha pokhrel
1,RamRegmi@email.com,ram regmi
2,HariRegmi@email.com,hari regmi


### Adding multiple columns from a single column by separating data

In [115]:
df['full_name'].str.split(' ', expand = True)

Unnamed: 0,0,1
0,prasiddha,pokhrel
1,ram,regmi
2,hari,regmi


In [116]:
df[['first','last']] = df['full_name'].str.split(' ', expand = True)
df

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi


### Adding and removing rows

1. We add single row into dataframe
2. Combine two dataframes together into a single dataframe by appending the rows of one to another  

#### Adding single row by using pd.concat([df,df2]) method

In [None]:
df2 = pd.DataFrame({'first':['Arya']})

In [None]:
df = pd.concat([df, df2], ignore_index = True, axis = 0)
# df.drop(0, axis = 1, inplace = True)
# df.drop(df.index[3:11], axis = 0, inplace = True)
df

In [None]:
if hasattr(pd.DataFrame, 'append'):
    print("The 'append' method is available in Pandas.")
else:
    print("The 'append' method is not available in Pandas.")

#### Adding multiple rows by using pd.concat() method

In [117]:
people = {
    'first': ['shyam', 'ishwor'],
    'last': ['kafle', 'gurung'],
    'email': ['ShyamKafle@email.com', 'IshworGurung@email.com']
}

df3 = pd.DataFrame(people)
df3

Unnamed: 0,first,last,email
0,shyam,kafle,ShyamKafle@email.com
1,ishwor,gurung,IshworGurung@email.com


In [121]:
df = pd.concat([df, df3], ignore_index=True)
df

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi
3,IshworGurung@email.com,,ishwor,gurung
4,ShyamKafle@email.com,,shyam,kafle
5,IshworGurung@email.com,,ishwor,gurung


### Dropping rows

In [122]:
df.drop(index = 3, inplace = True)
df

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi
4,ShyamKafle@email.com,,shyam,kafle
5,IshworGurung@email.com,,ishwor,gurung


In [123]:
# changing last of the row 5
last_name = df.loc[5,'last']
df.loc[5,'last'] = 'kafle'
df

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi
4,ShyamKafle@email.com,,shyam,kafle
5,IshworGurung@email.com,,ishwor,kafle


#### Filtering out row by last name and dropping them
First we get filter the last name to be deleted, in this case we filter 'last' with 'kafle'

Then we get their index and pass it to the drop(index=[])

In [124]:
filt = df['last'] == 'kafle'
df[filt].index

Index([4, 5], dtype='int64')

In [125]:
df.drop(index=df[filt].index)

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi


In [126]:
df

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi
4,ShyamKafle@email.com,,shyam,kafle
5,IshworGurung@email.com,,ishwor,kafle


## Part 7: Sorting Data

1. df.sort_values(by='last', ascending=True)
2. df.sort_values(by=['first','last'], ascending=[True, False])
3. df['last'].sort_values()
4. df['salary'].nlargest(10) # sorting the 10 largest salary
5. df['salary'].nsmallest(10) # sorting the 10 smallest salary

In [None]:
people = {
    'first': ['prasiddha', 'hari', 'ram', 'amrit'],
    'last': ['pokhrel', 'regmi', 'regmi', 'regmi'],
    'email': ['Prasiddha@email.com','HariRegmi@email.com', 'RamRegmi@email.com', 'a@email.com']
}

df = pd.DataFrame(people)
df

In [None]:
df.sort_values(by='last', ascending = False)

sorting last name in descending order,if there are two last name similar then sort by first name

In [None]:
df.sort_values(by=['last','first'], ascending = False)

sorting last name in descending order and first name in ascending order by passing boolean value in a list, it will sort the first name in ascending order if there are same multiple value in last name, for example, ram hari and amrit have same 'regmi' as last name, so it will sort these three names in ascending order as they have 'regmi' as same value

In [None]:
df.sort_values(by=['last','first'], ascending=[False, True])

sort_index() sort values by index

In [127]:
df.sort_index()

Unnamed: 0,email,full_name,first,last
0,Prasiddha@email.com,prasiddha pokhrel,prasiddha,pokhrel
1,RamRegmi@email.com,ram regmi,ram,regmi
2,HariRegmi@email.com,hari regmi,hari,regmi
4,ShyamKafle@email.com,,shyam,kafle
5,IshworGurung@email.com,,ishwor,kafle


sort_values() in series data (data of a single column)

In [128]:
df['last'].sort_values()

4      kafle
5      kafle
0    pokhrel
1      regmi
2      regmi
Name: last, dtype: object

### Practicing sorting in Stackoverflow data

In [129]:
df = pd.read_csv('data/survey_results_public.csv', index_col ='Respondent') # setting index when loading data with the column name 'Respondent'
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column')

In [130]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [131]:
df.head(1)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult


In [132]:
df.sort_values(by=['Country'], inplace = True)

In [133]:
df[['Country', 'ConvertedComp']]

Unnamed: 0_level_0,Country,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1
39258,Afghanistan,19152.0
63129,Afghanistan,1000000.0
85715,Afghanistan,
50767,Afghanistan,
2782,Afghanistan,
...,...,...
88062,,
88076,,
88601,,
88802,,


### Sorting countries in ascending order and salary in descending order

In [134]:
df.sort_values(by=['Country','ConvertedComp'], ascending=[True, False], inplace=True)

In [135]:
df[['Country','ConvertedComp']].head(50)

Unnamed: 0_level_0,Country,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1
63129,Afghanistan,1000000.0
50499,Afghanistan,153216.0
39258,Afghanistan,19152.0
58450,Afghanistan,17556.0
7085,Afghanistan,14364.0
22450,Afghanistan,7980.0
48436,Afghanistan,4464.0
10746,Afghanistan,3996.0
8149,Afghanistan,1596.0
29736,Afghanistan,1116.0


### Getting largest or smallest value from the dataframe
Getting 10 highest salary

In [137]:
df['ConvertedComp'].nlargest(10)

Respondent
25983    2000000.0
87896    2000000.0
22013    2000000.0
28243    2000000.0
72732    2000000.0
78151    2000000.0
80200    2000000.0
52132    2000000.0
75561    2000000.0
32250    2000000.0
Name: ConvertedComp, dtype: float64

In [139]:
df['ConvertedComp'].mean()

127110.73842323056

In [None]:
df.nlargest(10, 'ConvertedComp')

In [None]:
df.nsmallest(10, 'ConvertedComp')

## Part 8: Grouping and Aggregation - Analyzing and Exploring Data.

#### Aggregation
    - Combining multiple pieces of data into a single result. Using mean, median or mode are aggregate functions because they take multiple values and give mean, median or mode of those values.


1. df['ConvertedComp'].median()
2. df['ConvertedComp'].count() # counts the values except NaN value
2. df['Hobbyist'].value_counts() # counts the occurance of each value,like if there are 10 ones, 11 twos and 12 three in the value, it provides 1: 10, 2: 11, 3: 12

#### Grouping popular social media by countries
The groupby function works in three steps

splits the objects
applies a function
combines the result

#### advance data analysis 
    using group by, apply and loc to 
        - find the number of people who are using python according to the country
        - calculate the percentage of respondent using python
        - concat method
        - agg method


In [140]:
df = pd.read_csv('data/survey_results_public.csv', index_col ='Respondent') # setting index when loading data with the column name 'Respondent'
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col = 'Column')

In [141]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [142]:
df.head(3)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1.0,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult


#### Typical salary of developer of the survey

In [None]:
df['ConvertedComp'].head(15)

In [None]:
df['ConvertedComp'].median()

In [143]:
numeric_df = df.select_dtypes(include=['number']) # selecting the columns with numeric value and calculating the median
numeric_df.median()

CompTotal        62000.0
ConvertedComp    57287.0
WorkWeekHrs         40.0
CodeRevHrs           4.0
Age                 29.0
dtype: float64

In [144]:
df.describe()

Unnamed: 0,CompTotal,ConvertedComp,WorkWeekHrs,CodeRevHrs,Age
count,55945.0,55823.0,64503.0,49790.0,79210.0
mean,551901400000.0,127110.7,42.127197,5.084308,30.336699
std,73319260000000.0,284152.3,37.28761,5.513931,9.17839
min,0.0,0.0,1.0,0.0,1.0
25%,20000.0,25777.5,40.0,2.0,24.0
50%,62000.0,57287.0,40.0,4.0,29.0
75%,120000.0,100000.0,44.75,6.0,35.0
max,1e+16,2000000.0,4850.0,99.0,99.0


In [148]:
count_comp = df['ConvertedComp'].value_counts()
# max(count_comp)
count_comp

ConvertedComp
2000000.0    709
1000000.0    558
120000.0     502
100000.0     480
150000.0     434
            ... 
411096.0       1
261228.0       1
82322.0        1
66424.0        1
588012.0       1
Name: count, Length: 9162, dtype: int64

In [149]:
df['Hobbyist'].value_counts()

Hobbyist
Yes    71257
No     17626
Name: count, dtype: int64

In [150]:
# counting the popularity of the social media by value_counts() function
df['SocialMedia'].value_counts() # reddit was the most popular

SocialMedia
Reddit                      14374
YouTube                     13830
WhatsApp                    13347
Facebook                    13178
Twitter                     11398
Instagram                    6261
I don't use social media     5554
LinkedIn                     4501
WeChat 微信                     667
Snapchat                      628
VK ВКонта́кте                 603
Weibo 新浪微博                     56
Youku Tudou 优酷                 21
Hello                          19
Name: count, dtype: int64

In [151]:
df['SocialMedia'].value_counts(normalize=True)

SocialMedia
Reddit                      0.170233
YouTube                     0.163791
WhatsApp                    0.158071
Facebook                    0.156069
Twitter                     0.134988
Instagram                   0.074150
I don't use social media    0.065777
LinkedIn                    0.053306
WeChat 微信                   0.007899
Snapchat                    0.007437
VK ВКонта́кте               0.007141
Weibo 新浪微博                  0.000663
Youku Tudou 优酷              0.000249
Hello                       0.000225
Name: proportion, dtype: float64

## Grouping popular social media by countries
The group by function works in three steps
1. splits the objects
2. applies a function
3. combines the result

In [155]:
df['Country'].value_counts()

Country
United States        20949
India                 9061
Germany               5866
United Kingdom        5737
Canada                3395
                     ...  
Tonga                    1
Timor-Leste              1
North Korea              1
Brunei Darussalam        1
Chad                     1
Name: count, Length: 179, dtype: int64

In [160]:
df['Country'].value_counts().get('Nepal')

237

In [161]:
country_grp = df.groupby(['Country']) # returns a pandas GroupbyDataFrame object

In [162]:
country_grp

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x15b34ad70>

In [164]:
country_grp.get_group('Nepal')

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
508,I am a student who is learning to code,Yes,Never,"OSS is, on average, of LOWER quality than prop...","Not employed, but looking for work",Nepal,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Academic researcher;Designer;Developer, full-s...",1,13,,,,,,,"I’m not actively looking, but I am open to new...",I've never had a job,,,,,,,,,,,,,,,,,,,,,C;C++;HTML/CSS;JavaScript,HTML/CSS;JavaScript;SQL;TypeScript,Firebase;MongoDB,Firebase;MongoDB;MySQL;PostgreSQL,Heroku;Windows,Android;Arduino;Docker;Heroku;Linux;Raspberry ...,Express;React.js,Express;React.js;Vue.js,Node.js,Node.js;React Native;TensorFlow,Visual Studio Code,Windows,"Development;Outside of work, for personal proj...",,Useful for immutable record keeping outside of...,Yes,Yes,What?,Facebook,,,2018,Daily or almost daily,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,0-10 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not really",A lot less welcome now than last year,Industry news about technologies you're intere...,,Man,No,Bisexual,South Asian,Yes,Too long,Neither easy nor difficult
592,I am a developer by profession,Yes,Never,"OSS is, on average, of LOWER quality than prop...","Independent contractor, freelancer, or self-em...",Nepal,No,Some college/university study without earning ...,"Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,2-9 employees,"Database administrator;Developer, back-end",4,20,3,Slightly dissatisfied,Very dissatisfied,,,,"I’m not actively looking, but I am open to new...",1-2 years ago,Complete a take-home project;Interview with pe...,No,"Languages, frameworks, and other technologies ...",I heard about a job opportunity (from a recrui...,NPR,Nepalese rupee,,Monthly,,48.0,There is a schedule and/or spec (made by me or...,Inadequate access to necessary tools;Lack of s...,Less than once per month / Never,Office,Average,"Yes, because I was told to do so",2.0,,,,C;C++;HTML/CSS;PHP;Python;SQL,HTML/CSS;Python;SQL,Elasticsearch;MongoDB;MySQL;PostgreSQL,Elasticsearch;Firebase;MongoDB;PostgreSQL;Redis,AWS;Linux;WordPress,Android;AWS;Linux,Django,Django;Laravel;React.js,,Flutter;Hadoop;Node.js;TensorFlow,Sublime Text;Visual Studio Code,Linux-based,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,YouTube,In real life (in person),Username,2016,Multiple times per day,Find answers to specific questions;Learn how t...,3-5 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are",Not sure,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,29.0,Man,No,Straight / Heterosexual,South Asian,No,Appropriate in length,Neither easy nor difficult
984,I am a developer by profession,Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Nepal,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Participated in a full-time developer training...,20 to 99 employees,Academic researcher;Data or business analyst;D...,6,14,2,Very satisfied,Very satisfied,Somewhat confident,No,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,"Write code by hand (e.g., on a whiteboard)",No,Industry that I'd be working in;Specific depar...,"My job status changed (promotion, new job, etc.)",USD,United States dollar,10000.0,Yearly,10000.0,40.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Inadequate access...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process","The CTO, CIO, or other management purchase new...",I have some influence,C++;Go;Other(s):,Assembly;Go;Other(s):,MySQL;Other(s):,,AWS;Linux;Slack;Other(s):,AWS;IBM Cloud or Watson;Linux;Slack;Other(s):,,,Other(s):,Other(s):,IntelliJ;Visual Studio Code,Linux-based,I do not use containers,Non-currency applications of blockchain,Useful for immutable record keeping outside of...,Yes,Yes,What?,LinkedIn,,,2014,Multiple times per day,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was much faster,0-10 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, definitely",A lot more welcome now than last year,Tech articles written by other developers;Indu...,25.0,Man,No,Straight / Heterosexual,East Asian,Yes,Too long,Neither easy nor difficult
1355,I am a developer by profession,Yes,Never,"OSS is, on average, of LOWER quality than prop...",Employed full-time,Nepal,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...","Taught yourself a new language, framework, or ...",100 to 499 employees,"Developer, back-end;Developer, front-end;Devel...",5,13,Less than 1 year,Slightly satisfied,Slightly satisfied,Somewhat confident,Yes,Yes,"I’m not actively looking, but I am open to new...",Less than a year ago,,No,Financial performance or funding status of the...,"Something else changed (education, award, medi...",NPR,Nepalese rupee,30000.0,Monthly,3168.0,40.0,There's no schedule or spec; I work on what se...,Not enough people for the workload;Toxic work ...,Less than once per month / Never,Home,Average,"Yes, because I see value in code review",3.0,"No, and I'm glad we don't","The CTO, CIO, or other management purchase new...",I have little or no influence,HTML/CSS;Java;JavaScript;SQL;TypeScript,Go;JavaScript;SQL;Swift;TypeScript,MongoDB;MySQL;PostgreSQL;SQLite,MongoDB;PostgreSQL,Android;Linux;Windows,Linux,Express;React.js,Express;React.js,Node.js,Node.js,Android Studio;IntelliJ;Visual Studio Code,Linux-based,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",No,Yes,Yes,YouTube,In real life (in person),Username,,Daily or almost daily,Find answers to specific questions,More than 10 times per week,They were about the same,,Yes,Less than once per month or monthly,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,South Asian,No,Too long,Easy
1783,I am a developer by profession,Yes,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Nepal,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...","Taught yourself a new language, framework, or ...",2-9 employees,"Academic researcher;Developer, back-end",2,12,1,Slightly satisfied,Slightly satisfied,Not at all confident,No,Yes,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",Yes,"Languages, frameworks, and other technologies ...",I was preparing for a job search,NPR,Nepalese rupee,16000.0,Monthly,1692.0,48.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Distrac...,Less than once per month / Never,Office,Average,No,,"No, but I think we should",Not sure,I have some influence,Python,Python,PostgreSQL;SQLite,PostgreSQL,Docker;Linux;MacOS,Docker;Linux;MacOS,Other(s):,Other(s):,,,Atom;Vim,MacOS,Development,Not at all,,Yes,Yes,Yes,Facebook,Online,Username,2014,Multiple times per day,Find answers to specific questions;Learn how t...,6-10 times per week,Stack Overflow was much faster,0-10 minutes,Yes,A few times per month or weekly,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86618,I am a developer by profession,Yes,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Nepal,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, back-end;Developer, front-end",3,22,1,Very satisfied,Slightly dissatisfied,Not at all confident,No,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,"Write code by hand (e.g., on a whiteboard);Int...",Yes,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,1000.0,Monthly,12000.0,50.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Home,A little above average,"Yes, because I see value in code review",5.0,"No, but I think we should",Developers and management have nearly equal in...,I have little or no influence,HTML/CSS;Java;JavaScript;Python;Ruby,Go;HTML/CSS;JavaScript;Python;TypeScript,Firebase;MongoDB,Firebase;MongoDB,Android;AWS;Docker;Heroku;MacOS;Raspberry Pi,AWS;Docker;Heroku;MacOS,Express;React.js,Express;React.js,Node.js;React Native,Node.js;React Native,Visual Studio Code;Xcode,MacOS,"Development;Production;Outside of work, for pe...",Not at all,Useful across many domains and could change ma...,No,Yes,Yes,YouTube,In real life (in person),Username,2012,Daily or almost daily,Find answers to specific questions;Contribute ...,3-5 times per week,Stack Overflow was slightly faster,31-60 minutes,Yes,A few times per month or weekly,Yes,"No, I've heard of them, but I am not part of a...","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,27.0,Man,No,Straight / Heterosexual,South Asian,No,Appropriate in length,Easy
86742,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Nepal,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Information systems, information technology, o...",Taken an online course in programming or softw...,20 to 99 employees,"Developer, full-stack",3,16,2,Neither satisfied nor dissatisfied,Neither satisfied nor dissatisfied,Somewhat confident,Yes,Yes,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in senior / management r...,No,Industry that I'd be working in;Financial perf...,"Something else changed (education, award, medi...",NPR,Nepalese rupee,,,,,There is a schedule and/or spec (made by me or...,Distracting work environment;Time spent commut...,Less than once per month / Never,Office,A little above average,"Yes, because I see value in code review",12.0,"Yes, it's part of our process",Not sure,I have some influence,PHP,PHP;Python,MySQL;Oracle;Redis;SQLite,MariaDB;MongoDB;MySQL;PostgreSQL;Redis;SQLite,AWS;Linux;Slack;WordPress,AWS;Docker;Google Cloud Platform;Linux;Raspber...,Django;Laravel,Django;Flask;Laravel;Vue.js,,TensorFlow;Xamarin,PHPStorm;PyCharm,Linux-based,Development;Testing;Production,,,Yes,Yes,What?,Facebook,In real life (in person),Login,2013,Daily or almost daily,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,31-60 minutes,Not sure / can't remember,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,A lot more welcome now than last year,Tech articles written by other developers;Tech...,,Man,No,Straight / Heterosexual,South Asian,,Too long,Neither easy nor difficult
87231,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of LOWER quality than prop...",Employed full-time,Nepal,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken a part-time in-person course in programm...,2-9 employees,"Developer, back-end;Developer, full-stack;Student",8,14,2,Very satisfied,Slightly satisfied,I don't have a manager,Yes,Yes,"I’m not actively looking, but I am open to new...",Less than a year ago,Write any code;Solve a brain-teaser style puzz...,No,Office environment or company culture;Remote w...,I heard about a job opportunity (from a recrui...,USD,United States dollar,,,,,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Lack of...,About half the time,Office,A little above average,"Yes, because I see value in code review",14.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have some influence,Assembly;Bash/Shell/PowerShell;C;C++;Dart;HTML...,Elixir;Erlang;TypeScript,Firebase;MariaDB;MySQL;SQLite,Cassandra;Couchbase;DynamoDB;Elasticsearch;Mon...,Android;Arduino;Linux;Raspberry Pi;Slack;Windo...,AWS;Docker;Heroku;Kubernetes,jQuery;Vue.js,Django;Express;Laravel;React.js;Vue.js,Flutter,Ansible;Apache Spark;Chef;Hadoop;Node.js;Puppet,Sublime Text;Visual Studio Code,Linux-based,Development;Testing,Not at all,Useful for immutable record keeping outside of...,Yes,Yes,Yes,WhatsApp,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was much faster,60+ minutes,Yes,Multiple times per day,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,25.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Easy
87568,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Nepal,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Participated in a full-time developer training...,2-9 employees,"Academic researcher;Designer;Developer, back-e...",4,18,3,Neither satisfied nor dissatisfied,Slightly satisfied,Somewhat confident,No,Yes,"I’m not actively looking, but I am open to new...",1-2 years ago,Write any code;Complete a take-home project,No,Office environment or company culture;Remote w...,"My job status changed (promotion, new job, etc.)",NPR,Nepalese rupee,25000.0,Monthly,2640.0,44.0,There is a schedule and/or spec (made by me or...,Inadequate access to necessary tools;Lack of s...,"Less than half the time, but at least one day ...",Home,Average,"Yes, because I see value in code review",10.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have little or no influence,C;C++;C#;HTML/CSS;JavaScript;PHP;SQL;TypeScript,Java;JavaScript;PHP;TypeScript,Firebase;MariaDB;MongoDB;Microsoft SQL Server;...,Firebase;MongoDB;MySQL,Windows;WordPress,Android;AWS;Docker;iOS;Slack,ASP.NET;Express;jQuery;Laravel;React.js;Vue.js,Django;Express;Laravel;Vue.js,Apache Spark;.NET;Node.js,Flutter;Node.js;Puppet;Xamarin,IntelliJ;Notepad++;PHPStorm;Sublime Text;Visua...,Windows,Development;Testing;Production,Not at all,,Yes,Also Yes,Yes,YouTube,Online,UserID,2015,Multiple times per day,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was slightly faster,60+ minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,23.0,Man,No,Straight / Heterosexual,South Asian,No,Too long,Easy


In [165]:
country_grp.get_group('Nepal')['ConvertedComp'].median() # getting the median salary of the Nepal

4224.0

#### We can also use loc to filter the country by it's name

In [166]:
filt = df['Country'] == 'Nepal'
df.loc[filt]['SocialMedia'].value_counts()

SocialMedia
YouTube                     95
Facebook                    71
LinkedIn                    22
Twitter                     16
Reddit                       9
Instagram                    8
WhatsApp                     5
I don't use social media     4
WeChat 微信                    1
Name: count, dtype: int64

In [168]:
schema_df.loc['SocialMedia']

QuestionText    What social media site do you use the most?
Name: SocialMedia, dtype: object

#### We have all the countries grouped by country, now we can apply a function to them

In [171]:
# getting the popular social media by groupby object and using value_counts on 'SocialMedia' column
country_grp['SocialMedia'].value_counts().loc['United Kingdom']

SocialMedia
Reddit                      1199
Twitter                      995
WhatsApp                     909
Facebook                     811
YouTube                      681
I don't use social media     372
Instagram                    318
LinkedIn                     175
Snapchat                      70
WeChat 微信                     11
VK ВКонта́кте                  3
Weibo 新浪微博                     1
Youku Tudou 优酷                 1
Name: count, dtype: int64

In [None]:
country_grp['SocialMedia'].value_counts().loc['China'].head(3)

In [None]:
country_grp['SocialMedia'].value_counts().loc['Russian Federation'].head(3)

In [172]:
country_grp['SocialMedia'].value_counts(normalize=True).loc['Russian Federation'].head(3)

SocialMedia
VK ВКонта́кте               0.303571
YouTube                     0.207418
I don't use social media    0.106456
Name: proportion, dtype: float64

#### Getting the median salary for the countries in grouby object

In [173]:
country_grp['ConvertedComp'].median()

Country
Afghanistan                               6222.0
Albania                                  10818.0
Algeria                                   7878.0
Andorra                                 160931.0
Angola                                    7764.0
                                          ...   
Venezuela, Bolivarian Republic of...      6384.0
Viet Nam                                 11892.0
Yemen                                    11940.0
Zambia                                    5040.0
Zimbabwe                                 19200.0
Name: ConvertedComp, Length: 179, dtype: float64

In [None]:
country_grp['ConvertedComp'].median().loc['Nepal']

### Agg method to calculate multiple aggregrate functions like mean, median

In [174]:
country_grp['ConvertedComp'].agg(['median','mean'])

Unnamed: 0_level_0,median,mean
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,6222.0,101953.333333
Albania,10818.0,21833.700000
Algeria,7878.0,34924.047619
Andorra,160931.0,160931.000000
Angola,7764.0,7764.000000
...,...,...
"Venezuela, Bolivarian Republic of...",6384.0,14581.627907
Viet Nam,11892.0,17233.436782
Yemen,11940.0,16909.166667
Zambia,5040.0,10075.375000


### Getting the number of people who are using python

In [175]:
filt = df['Country'] == 'Nepal'
df.loc[filt]['LanguageWorkedWith'].str.contains('Python').sum()

101

In [176]:
country_grp.get_group('Nepal').head(1)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
508,I am a student who is learning to code,Yes,Never,"OSS is, on average, of LOWER quality than prop...","Not employed, but looking for work",Nepal,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Academic researcher;Designer;Developer, full-s...",1,13,,,,,,,"I’m not actively looking, but I am open to new...",I've never had a job,,,,,,,,,,,,,,,,,,,,,C;C++;HTML/CSS;JavaScript,HTML/CSS;JavaScript;SQL;TypeScript,Firebase;MongoDB,Firebase;MongoDB;MySQL;PostgreSQL,Heroku;Windows,Android;Arduino;Docker;Heroku;Linux;Raspberry ...,Express;React.js,Express;React.js;Vue.js,Node.js,Node.js;React Native;TensorFlow,Visual Studio Code,Windows,"Development;Outside of work, for personal proj...",,Useful for immutable record keeping outside of...,Yes,Yes,What?,Facebook,,,2018,Daily or almost daily,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,0-10 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not really",A lot less welcome now than last year,Industry news about technologies you're intere...,,Man,No,Bisexual,South Asian,Yes,Too long,Neither easy nor difficult


In [184]:
country_grp['LanguageWorkedWith'].str.contains('Python').sum() # throws error, AttributeError: 'SeriesGroupBy' object has no attribute 'str'

AttributeError: 'SeriesGroupBy' object has no attribute 'str'

In [190]:
# lambda function recap as we are working with lambda function
check = lambda x, item: item in x
check([1,2], 1)

True

 We can't directly use the .str method on a SeriesGroupBy object, but we can apply it to individual Series within the groups
Grouping: When you use the groupby method on a DataFrame, it divides the data into separate groups based on the values in a specified column (or multiple columns). Each group contains rows that share a common value or set of values in the grouping column(s).

SeriesGroupBy Object: The result of a groupby operation is a SeriesGroupBy object. This object represents the grouped data, but it doesn't allow you to perform operations directly on the groups as a whole.

Applying to Individual Series: To work with the data within each group, you can use the .apply method in combination with a lambda function or another custom function. This allows you to perform operations on each individual Series (column) within the groups separately.

In [192]:
country_grp['LanguageWorkedWith']

<pandas.core.groupby.generic.SeriesGroupBy object at 0x138847580>

In [193]:
country_grp['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python'))

# in the below result there is a series for individual country, which makes it multi index series and apply function 
# works on each country's grouped data

Country      Respondent
Afghanistan  722             NaN
             6417          False
             7085          False
             7353          False
             8149          False
                           ...  
Zimbabwe     76594         False
             77533         False
             82752         False
             83850         False
             84155         False
Name: LanguageWorkedWith, Length: 88751, dtype: object

In [None]:
country_grp['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())

In [None]:
country_grp['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum()).loc['Nepal']

In [None]:
country_grp['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())

 ### Exercise: find the percentage of respondent who knows python for each country

In [194]:
country_respondent = df['Country'].value_counts()
country_respondent

Country
United States        20949
India                 9061
Germany               5866
United Kingdom        5737
Canada                3395
                     ...  
Tonga                    1
Timor-Leste              1
North Korea              1
Brunei Darussalam        1
Chad                     1
Name: count, Length: 179, dtype: int64

In [195]:
country_grp = df.groupby('Country')
country_grp

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x138959900>

In [196]:
respondent_know_python = country_grp['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())
respondent_know_python

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [197]:
python_df = pd.concat([country_respondent, respondent_know_python], axis = 'columns', sort = False)
python_df

Unnamed: 0_level_0,count,LanguageWorkedWith
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
Tonga,1,0
Timor-Leste,1,1
North Korea,1,0
Brunei Darussalam,1,0


In [198]:
python_df.rename(columns={'count':'NumRespondents', 'LanguageWorkedWith':'NumKnowsPython'}, inplace = True)

In [199]:
python_df

Unnamed: 0_level_0,NumRespondents,NumKnowsPython
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
Tonga,1,0
Timor-Leste,1,1
North Korea,1,0
Brunei Darussalam,1,0


In [200]:
python_df['PctKnowsPython'] = (python_df['NumKnowsPython']/python_df['NumRespondents'])*100

In [201]:
python_df

Unnamed: 0_level_0,NumRespondents,NumKnowsPython,PctKnowsPython
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
United States,20949,10083,48.131176
India,9061,3105,34.267741
Germany,5866,2451,41.783157
United Kingdom,5737,2384,41.554820
Canada,3395,1558,45.891016
...,...,...,...
Tonga,1,0,0.000000
Timor-Leste,1,1,100.000000
North Korea,1,0,0.000000
Brunei Darussalam,1,0,0.000000


In [202]:
python_df.sort_values(by='PctKnowsPython', ascending = False, inplace = True)

In [203]:
python_df

Unnamed: 0_level_0,NumRespondents,NumKnowsPython,PctKnowsPython
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sao Tome and Principe,1,1,100.000000
Timor-Leste,1,1,100.000000
Dominica,1,1,100.000000
Niger,1,1,100.000000
Turkmenistan,7,6,85.714286
...,...,...,...
Cape Verde,3,0,0.000000
Lao People's Democratic Republic,3,0,0.000000
Malawi,2,0,0.000000
Liberia,2,0,0.000000


In [204]:
python_df.loc['Japan']

NumRespondents    391.000000
NumKnowsPython    182.000000
PctKnowsPython     46.547315
Name: Japan, dtype: float64