# Aggregation and Grouping

In [1]:
import pandas as pd 
import numpy as np 

In [8]:
#  dummy dictionary to convert it into dataframe

data = {
    'first_name': ['Alice', 'Bob', 'Charlie', 'David'],
    'last_name': ['Pretty', 'Chura', 'BC', 'BSDK'],
    'age': [25, 32, 18, 47],
    'city': ['New York', 'Paris', 'London', 'San Francisco'],
    'salary':[10000,120000,125000,105000]
}

In [9]:
data = pd.DataFrame(data)

In [34]:
df = pd.read_csv("CSVs\survey_results_public.csv", index_col="Respondent")

In [35]:
pd.set_option("display.max_columns",85)
pd.set_option("display.max_rows",85)

## Aggregation
Aggregation means combining multiple pieces of data into single results
e.g. mean(), median() ,mode() are aggregate function because it combines multiple values and gives us single result

In [36]:
df["ConvertedComp"].head(15)

Respondent
1          NaN
2          NaN
3       8820.0
4      61000.0
5          NaN
6     366420.0
7          NaN
8          NaN
9      95179.0
10     13293.0
11         NaN
12         NaN
13     90000.0
14     57060.0
15         NaN
Name: ConvertedComp, dtype: float64

### Collecting median values
Median values gives us the middle value and ignore the **NaN values**
- why we use median to take the middle value of salaries and not the mean() 9s because mean is affected by the outliers and median does not have that much affect of the outliers

In [37]:
df["ConvertedComp"].median()

57287.0

In [38]:
# COOL STUFF
# Applying median on the whole dataframe will check the dataframe for the integer values and 
# gives us the resultant column medians
df.median()

  df.median()


CompTotal        62000.0
ConvertedComp    57287.0
WorkWeekHrs         40.0
CodeRevHrs           4.0
Age                 29.0
dtype: float64

In [39]:
pd.__version__

'1.4.4'

### Getting broader view of data using describe()

In [40]:
df.describe()

Unnamed: 0,CompTotal,ConvertedComp,WorkWeekHrs,CodeRevHrs,Age
count,55945.0,55823.0,64503.0,49790.0,79210.0
mean,551901400000.0,127110.7,42.127197,5.084308,30.336699
std,73319260000000.0,284152.3,37.28761,5.513931,9.17839
min,0.0,0.0,1.0,0.0,1.0
25%,20000.0,25777.5,40.0,2.0,24.0
50%,62000.0,57287.0,40.0,4.0,29.0
75%,120000.0,100000.0,44.75,6.0,35.0
max,1e+16,2000000.0,4850.0,99.0,99.0


In [41]:
# we can also run describe method on series 
df["ConvertedComp"].describe()

count    5.582300e+04
mean     1.271107e+05
std      2.841523e+05
min      0.000000e+00
25%      2.577750e+04
50%      5.728700e+04
75%      1.000000e+05
max      2.000000e+06
Name: ConvertedComp, dtype: float64

In [42]:
# count in the descibe method result give us the not NaN values count 
df["ConvertedComp"].count()

# we have total 80000+ rows but the people who entered their salaries are just 55823

55823

In [43]:
# What if we want to calculate How many people have do programming from the "Hobbyist" column
df["Hobbyist"].value_counts()

# value_counts() is really helpful when we want to get the count data on specific values 

Yes    71257
No     17626
Name: Hobbyist, dtype: int64

In [44]:
# value_counts() is mostly used when we work on dicrete number of outputs from the dataset column 
# There's another usecase in above Dataset, like we want to check how many people use which social media and we have a column
# in the dataframe "SocialMedia"
df["SocialMedia"].value_counts()

# this will give us the number of people using each social media

Reddit                      14374
YouTube                     13830
WhatsApp                    13347
Facebook                    13178
Twitter                     11398
Instagram                    6261
I don't use social media     5554
LinkedIn                     4501
WeChat 微信                     667
Snapchat                      628
VK ВКонта́кте                 603
Weibo 新浪微博                     56
Youku Tudou 优酷                 21
Hello                          19
Name: SocialMedia, dtype: int64

In [45]:
# to broke down the above results on percentage we use the normalize argument and set it True
df["SocialMedia"].value_counts(normalize=True)


# it is giving us the 17% of users use Reddit

Reddit                      0.170233
YouTube                     0.163791
WhatsApp                    0.158071
Facebook                    0.156069
Twitter                     0.134988
Instagram                   0.074150
I don't use social media    0.065777
LinkedIn                    0.053306
WeChat 微信                   0.007899
Snapchat                    0.007437
VK ВКонта́кте               0.007141
Weibo 新浪微博                  0.000663
Youku Tudou 优酷              0.000249
Hello                       0.000225
Name: SocialMedia, dtype: float64

## Groupping
Popularity of social medias vary allot on the basis of the countries  
So, to get the results in which we can see each country with the percentage of social medias used in that country we use the grouping technique

According to official documentation of pandas, it says, grouping is splitting the objects, applying the function and combinning the results 

**Official statement**:
A groupby operation involves some combination of splitting the object, applying a function, and combining the results. This can be used to group large amounts of data and compute operations on these groups.

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html

In [46]:
country_grp = df.groupby(by=["Country"])
country_grp

# This gives us the groupby object that contains bunch of groups (names of each country)
# and now we can apply multiple functions on it 

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000025C78855550>

In [47]:
# Getting the group of Pakistan
country_grp.get_group("Pakistan")

# this is giving us a group of Pakistan that has all records of Country Pakistan, but it is not that simple that it just returns
# all rows of Pakistan, it return the result of group and we can apply multiple functions on single or multiple groups at the 
# same time
# obviously we can do the same by using the filter like below

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
84,I am a developer by profession,No,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",,2-9 employees,"Designer;Developer, mobile;Educator;Student",3,20,2,Slightly satisfied,Slightly satisfied,Somewhat confident,Yes,Yes,"I’m not actively looking, but I am open to new...",1-2 years ago,Solve a brain-teaser style puzzle;Interview wi...,Yes,Office environment or company culture;Opportun...,I was preparing for a job search,PKR,Pakistani rupee,40000.0,Monthly,3468.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Distrac...,Less than once per month / Never,Office,Average,"Yes, because I see value in code review",20.0,"No, but I think we should",Not sure,I have some influence,C;C++;C#;Java;Kotlin;PHP;SQL,C#;Java;Kotlin;Objective-C,Firebase;MySQL;SQLite,Firebase;MySQL;Oracle;SQLite,Android;Windows,Android;Windows,ASP.NET,,Unity 3D,Unity 3D,Android Studio;Visual Studio,Windows,Development;Testing;Production;Outside of work...,Not at all,,No,Also Yes,No,WhatsApp,In real life (in person),Screen Name,2016,Multiple times per day,Learn how to do things I didn’t necessarily lo...,More than 10 times per week,Stack Overflow was much faster,11-30 minutes,Yes,Daily or almost daily,Yes,Yes,"Yes, definitely",A lot more welcome now than last year,Courses on technologies you're interested in,26.0,Man,No,Straight / Heterosexual,Middle Eastern,No,Too long,Neither easy nor difficult
119,I am a developer by profession,No,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken a part-time in-person course in programm...,20 to 99 employees,"Developer, back-end;Developer, mobile",10,24,6,Slightly dissatisfied,Slightly dissatisfied,Somewhat confident,No,Not sure,"I’m not actively looking, but I am open to new...",3-4 years ago,,No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,PKR,Pakistani rupee,,Monthly,,35.0,There's no schedule or spec; I work on what se...,Being tasked with non-development work;Not eno...,Less than once per month / Never,Office,A little above average,"Yes, because I was told to do so",10.0,"Yes, it's part of our process","The CTO, CIO, or other management purchase new...",I have little or no influence,C;C++;C#;HTML/CSS;Java;JavaScript;SQL,C;C++;C#;HTML/CSS;Java;JavaScript;Kotlin;Pytho...,Firebase;Microsoft SQL Server;MySQL;SQLite,Firebase;MongoDB;Microsoft SQL Server;MySQL;SQ...,Android;Slack;WordPress,Android;iOS;Slack;WordPress,ASP.NET,Angular/Angular.js;React.js,.NET,Node.js;React Native,Android Studio;Notepad++,Windows,Development,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,Also Yes,No,Facebook,In real life (in person),Username,2010,Multiple times per day,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Daily or almost daily,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,28.0,Man,No,,,Yes,Appropriate in length,Easy
298,I am a developer by profession,Yes,Less than once a month but more than once per ...,"OSS is, on average, of LOWER quality than prop...",,Pakistan,"Yes, part-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,,,4,17,Less than 1 year,Slightly dissatisfied,Slightly satisfied,,,,I am actively looking for a job,NA - I am an independent contractor or self em...,Write any code,Yes,Office environment or company culture;Remote w...,I was preparing for a job search,PKR,Pakistani rupee,,Monthly,,35.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Meeting...,All or almost all the time (I'm full-time remote),Office,Average,"Yes, because I see value in code review",10.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have a great deal of influence,HTML/CSS;JavaScript;PHP;SQL;Other(s):,JavaScript;SQL;TypeScript;Other(s):,MySQL;PostgreSQL,DynamoDB;Firebase;MongoDB;MySQL;PostgreSQL;Redis,AWS;Linux;Slack;Windows;WordPress,AWS;Docker;Heroku;Linux;Slack;Windows,ASP.NET;jQuery;React.js;Vue.js,React.js;Vue.js,Node.js,Node.js;React Native,PHPStorm;Sublime Text;Visual Studio,Windows,Development,,,Yes,Yes,No,LinkedIn,Neither,Username,2017,A few times per week,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers;Indu...,23.0,Man,No,,,No,Too long,Neither easy nor difficult
299,I am a developer by profession,Yes,Less than once per year,The quality of OSS and closed source software ...,Employed part-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,10 to 19 employees,Academic researcher;Data scientist or machine ...,19,Younger than 5 years,6,Slightly dissatisfied,Slightly dissatisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,,Yes,"Industry that I'd be working in;Languages, fra...","Something else changed (education, award, medi...",PKR,Pakistani rupee,0.0,,,,There's no schedule or spec; I work on what se...,Distracting work environment;Non-work commitme...,"More than half, but not all, the time","Other place, such as a coworking space or cafe",Far below average,No,,,Not sure,I have little or no influence,Assembly;C;C++;Java;Python;SQL,Assembly;C;C++;Python,Microsoft SQL Server,,Android;Arduino;Linux;Raspberry Pi;Windows,Arduino;Linux;Raspberry Pi;Windows,,,TensorFlow;Other(s):,Other(s):,Notepad++;PyCharm,Windows,I do not use containers,Not at all,A passing fad,Yes,Yes,Yes,Facebook,In real life (in person),Username,2011,Daily or almost daily,Find answers to specific questions,3-5 times per week,,,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","No, not at all",Somewhat more welcome now than last year,,25.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Easy
311,I am a developer by profession,No,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,20 to 99 employees,Academic researcher;Database administrator;Dev...,5,18,Less than 1 year,Neither satisfied nor dissatisfied,Slightly dissatisfied,Somewhat confident,No,Not sure,I am actively looking for a job,Less than a year ago,Write any code;Solve a brain-teaser style puzzle,No,Financial performance or funding status of the...,I was preparing for a job search,PKR,Pakistani rupee,360000.0,Yearly,2600.0,48.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Lack of...,About half the time,Office,A little above average,"Yes, because I see value in code review",5.0,"Yes, it's not part of our process but the deve...",Developers typically have the most influence o...,I have some influence,Assembly;C;C++;C#;HTML/CSS;Java;Python;Scala;SQL,C++;Java;Python;SQL,MongoDB;Microsoft SQL Server;MySQL;SQLite,Cassandra;Firebase;MongoDB;Microsoft SQL Serve...,Arduino;IBM Cloud or Watson;Linux;Raspberry Pi...,Android;Arduino;AWS;Docker;Google Cloud Platfo...,ASP.NET;Django;Express;Spring,Angular/Angular.js;ASP.NET;Django;Express;jQue...,.NET;Node.js,Apache Spark;Hadoop;Node.js,NetBeans;Notepad++;Visual Studio Code,Windows,Development;Testing,Non-currency applications of blockchain,Useful across many domains and could change ma...,Yes,Yes,No,LinkedIn,Online,Login,2014,Daily or almost daily,Find answers to specific questions;Get a sense...,More than 10 times per week,Stack Overflow was slightly faster,0-10 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Industry news about technologies you're intere...,24.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Neither easy nor difficult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88862,I am a student who is learning to code,Yes,Less than once per year,"OSS is, on average, of LOWER quality than prop...","Independent contractor, freelancer, or self-em...",Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,,"Developer, back-end;Developer, embedded applic...",3,18,,,,,,,"I’m not actively looking, but I am open to new...",Less than a year ago,,,Industry that I'd be working in;Financial perf...,Re-entry into the workforce,,,,,,,,,,,,,,,,,Java,Java;JavaScript;Swift;Other(s):,Firebase,Firebase;MongoDB;MySQL;Oracle;SQLite,Android;Arduino,Android;Arduino;iOS;MacOS,,React.js,,Node.js;React Native,Android Studio;Xcode,MacOS,Development,,Useful across many domains and could change ma...,Yes,Yes,What?,WhatsApp,Online,UserID,2016,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was slightly faster,31-60 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers,21.0,Man,No,Bisexual,,Yes,Too long,Difficult
5439,,Yes,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Information systems, information technology, o...",Taken an online course in programming or softw...,,"Developer, full-stack",2,24,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,C++;HTML/CSS;Java;JavaScript;Objective-C;PHP,,MySQL,,Android;IBM Cloud or Watson;iOS;MacOS;Windows;...,,Express,,Unity 3D,Android Studio,Windows,Production,,A passing fad,Yes,Yes,Yes,Instagram,Online,UserID,2018,I have never visited Stack Overflow (before to...,Meet other people with similar skills or inter...,Less than once per week,Stack Overflow was much faster,31-60 minutes,,,"No, I didn't know that Stack Overflow had a jo...","No, I've heard of them, but I am not part of a...","No, not at all",A lot more welcome now than last year,Industry news about technologies you're intere...,24.0,Woman,Yes,,,No,Too short,Easy
39117,,Yes,Never,"OSS is, on average, of HIGHER quality than pro...","Not employed, but looking for work",Pakistan,"Yes, part-time","Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,Taken an online course in programming or softw...,,Academic researcher;Data or business analyst;D...,4,18,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,C;C++;C#;HTML/CSS;Java;JavaScript;SQL,C#;HTML/CSS;Java;JavaScript;PHP;SQL,,,Android,Android,ASP.NET,,.NET;Xamarin,.NET;Xamarin,Android Studio;Notepad++;Sublime Text;Visual S...,Windows,Development;Testing;Production;Outside of work...,,,Yes,Yes,No,WhatsApp,In real life (in person),Username,2015,Daily or almost daily,Find answers to specific questions;Learn how t...,Less than once per week,Stack Overflow was much faster,31-60 minutes,Yes,Daily or almost daily,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Bisexual;Straight / Heterosexual,East Asian,Yes,Appropriate in length,Easy
60066,,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Pakistan,"Yes, full-time",,I never declared a major,Taken an online course in programming or softw...,,,4,16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Assembly;C++;C#;HTML/CSS;Java;PHP;Python;SQL,C++;C#;HTML/CSS;Java;JavaScript;PHP;Python;SQL,Firebase;MySQL;SQLite,Firebase;MongoDB;MySQL;SQLite,Android;Windows,Android;Docker;Google Cloud Platform;IBM Cloud...,,Angular/Angular.js;Django;React.js;Spring,Unity 3D,Flutter;Node.js;Unity 3D,Android Studio;NetBeans;PyCharm;Visual Studio ...,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,In real life (in person),Username,2017,A few times per week,Find answers to specific questions;Meet other ...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Cour...,20.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Easy


In [49]:
filt = df["Country"] == "Pakistan"
df.loc[filt]

# But this is only giving us the records of single country and if we want to change the country we need to change the filter
# but in the groupby it groups all the countries and creates seperate objects of all countries

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
84,I am a developer by profession,No,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",,2-9 employees,"Designer;Developer, mobile;Educator;Student",3,20,2,Slightly satisfied,Slightly satisfied,Somewhat confident,Yes,Yes,"I’m not actively looking, but I am open to new...",1-2 years ago,Solve a brain-teaser style puzzle;Interview wi...,Yes,Office environment or company culture;Opportun...,I was preparing for a job search,PKR,Pakistani rupee,40000.0,Monthly,3468.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Distrac...,Less than once per month / Never,Office,Average,"Yes, because I see value in code review",20.0,"No, but I think we should",Not sure,I have some influence,C;C++;C#;Java;Kotlin;PHP;SQL,C#;Java;Kotlin;Objective-C,Firebase;MySQL;SQLite,Firebase;MySQL;Oracle;SQLite,Android;Windows,Android;Windows,ASP.NET,,Unity 3D,Unity 3D,Android Studio;Visual Studio,Windows,Development;Testing;Production;Outside of work...,Not at all,,No,Also Yes,No,WhatsApp,In real life (in person),Screen Name,2016,Multiple times per day,Learn how to do things I didn’t necessarily lo...,More than 10 times per week,Stack Overflow was much faster,11-30 minutes,Yes,Daily or almost daily,Yes,Yes,"Yes, definitely",A lot more welcome now than last year,Courses on technologies you're interested in,26.0,Man,No,Straight / Heterosexual,Middle Eastern,No,Too long,Neither easy nor difficult
119,I am a developer by profession,No,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken a part-time in-person course in programm...,20 to 99 employees,"Developer, back-end;Developer, mobile",10,24,6,Slightly dissatisfied,Slightly dissatisfied,Somewhat confident,No,Not sure,"I’m not actively looking, but I am open to new...",3-4 years ago,,No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,PKR,Pakistani rupee,,Monthly,,35.0,There's no schedule or spec; I work on what se...,Being tasked with non-development work;Not eno...,Less than once per month / Never,Office,A little above average,"Yes, because I was told to do so",10.0,"Yes, it's part of our process","The CTO, CIO, or other management purchase new...",I have little or no influence,C;C++;C#;HTML/CSS;Java;JavaScript;SQL,C;C++;C#;HTML/CSS;Java;JavaScript;Kotlin;Pytho...,Firebase;Microsoft SQL Server;MySQL;SQLite,Firebase;MongoDB;Microsoft SQL Server;MySQL;SQ...,Android;Slack;WordPress,Android;iOS;Slack;WordPress,ASP.NET,Angular/Angular.js;React.js,.NET,Node.js;React Native,Android Studio;Notepad++,Windows,Development,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,Also Yes,No,Facebook,In real life (in person),Username,2010,Multiple times per day,Find answers to specific questions;Learn how t...,More than 10 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Daily or almost daily,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,28.0,Man,No,,,Yes,Appropriate in length,Easy
298,I am a developer by profession,Yes,Less than once a month but more than once per ...,"OSS is, on average, of LOWER quality than prop...",,Pakistan,"Yes, part-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,,,4,17,Less than 1 year,Slightly dissatisfied,Slightly satisfied,,,,I am actively looking for a job,NA - I am an independent contractor or self em...,Write any code,Yes,Office environment or company culture;Remote w...,I was preparing for a job search,PKR,Pakistani rupee,,Monthly,,35.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Meeting...,All or almost all the time (I'm full-time remote),Office,Average,"Yes, because I see value in code review",10.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have a great deal of influence,HTML/CSS;JavaScript;PHP;SQL;Other(s):,JavaScript;SQL;TypeScript;Other(s):,MySQL;PostgreSQL,DynamoDB;Firebase;MongoDB;MySQL;PostgreSQL;Redis,AWS;Linux;Slack;Windows;WordPress,AWS;Docker;Heroku;Linux;Slack;Windows,ASP.NET;jQuery;React.js;Vue.js,React.js;Vue.js,Node.js,Node.js;React Native,PHPStorm;Sublime Text;Visual Studio,Windows,Development,,,Yes,Yes,No,LinkedIn,Neither,Username,2017,A few times per week,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers;Indu...,23.0,Man,No,,,No,Too long,Neither easy nor difficult
299,I am a developer by profession,Yes,Less than once per year,The quality of OSS and closed source software ...,Employed part-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,10 to 19 employees,Academic researcher;Data scientist or machine ...,19,Younger than 5 years,6,Slightly dissatisfied,Slightly dissatisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,,Yes,"Industry that I'd be working in;Languages, fra...","Something else changed (education, award, medi...",PKR,Pakistani rupee,0.0,,,,There's no schedule or spec; I work on what se...,Distracting work environment;Non-work commitme...,"More than half, but not all, the time","Other place, such as a coworking space or cafe",Far below average,No,,,Not sure,I have little or no influence,Assembly;C;C++;Java;Python;SQL,Assembly;C;C++;Python,Microsoft SQL Server,,Android;Arduino;Linux;Raspberry Pi;Windows,Arduino;Linux;Raspberry Pi;Windows,,,TensorFlow;Other(s):,Other(s):,Notepad++;PyCharm,Windows,I do not use containers,Not at all,A passing fad,Yes,Yes,Yes,Facebook,In real life (in person),Username,2011,Daily or almost daily,Find answers to specific questions,3-5 times per week,,,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","No, not at all",Somewhat more welcome now than last year,,25.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Easy
311,I am a developer by profession,No,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Pakistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,20 to 99 employees,Academic researcher;Database administrator;Dev...,5,18,Less than 1 year,Neither satisfied nor dissatisfied,Slightly dissatisfied,Somewhat confident,No,Not sure,I am actively looking for a job,Less than a year ago,Write any code;Solve a brain-teaser style puzzle,No,Financial performance or funding status of the...,I was preparing for a job search,PKR,Pakistani rupee,360000.0,Yearly,2600.0,48.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Lack of...,About half the time,Office,A little above average,"Yes, because I see value in code review",5.0,"Yes, it's not part of our process but the deve...",Developers typically have the most influence o...,I have some influence,Assembly;C;C++;C#;HTML/CSS;Java;Python;Scala;SQL,C++;Java;Python;SQL,MongoDB;Microsoft SQL Server;MySQL;SQLite,Cassandra;Firebase;MongoDB;Microsoft SQL Serve...,Arduino;IBM Cloud or Watson;Linux;Raspberry Pi...,Android;Arduino;AWS;Docker;Google Cloud Platfo...,ASP.NET;Django;Express;Spring,Angular/Angular.js;ASP.NET;Django;Express;jQue...,.NET;Node.js,Apache Spark;Hadoop;Node.js,NetBeans;Notepad++;Visual Studio Code,Windows,Development;Testing,Non-currency applications of blockchain,Useful across many domains and could change ma...,Yes,Yes,No,LinkedIn,Online,Login,2014,Daily or almost daily,Find answers to specific questions;Get a sense...,More than 10 times per week,Stack Overflow was slightly faster,0-10 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Industry news about technologies you're intere...,24.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Neither easy nor difficult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88862,I am a student who is learning to code,Yes,Less than once per year,"OSS is, on average, of LOWER quality than prop...","Independent contractor, freelancer, or self-em...",Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,,"Developer, back-end;Developer, embedded applic...",3,18,,,,,,,"I’m not actively looking, but I am open to new...",Less than a year ago,,,Industry that I'd be working in;Financial perf...,Re-entry into the workforce,,,,,,,,,,,,,,,,,Java,Java;JavaScript;Swift;Other(s):,Firebase,Firebase;MongoDB;MySQL;Oracle;SQLite,Android;Arduino,Android;Arduino;iOS;MacOS,,React.js,,Node.js;React Native,Android Studio;Xcode,MacOS,Development,,Useful across many domains and could change ma...,Yes,Yes,What?,WhatsApp,Online,UserID,2016,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was slightly faster,31-60 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers,21.0,Man,No,Bisexual,,Yes,Too long,Difficult
5439,,Yes,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Pakistan,"Yes, full-time","Bachelor’s degree (BA, BS, B.Eng., etc.)","Information systems, information technology, o...",Taken an online course in programming or softw...,,"Developer, full-stack",2,24,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,C++;HTML/CSS;Java;JavaScript;Objective-C;PHP,,MySQL,,Android;IBM Cloud or Watson;iOS;MacOS;Windows;...,,Express,,Unity 3D,Android Studio,Windows,Production,,A passing fad,Yes,Yes,Yes,Instagram,Online,UserID,2018,I have never visited Stack Overflow (before to...,Meet other people with similar skills or inter...,Less than once per week,Stack Overflow was much faster,31-60 minutes,,,"No, I didn't know that Stack Overflow had a jo...","No, I've heard of them, but I am not part of a...","No, not at all",A lot more welcome now than last year,Industry news about technologies you're intere...,24.0,Woman,Yes,,,No,Too short,Easy
39117,,Yes,Never,"OSS is, on average, of HIGHER quality than pro...","Not employed, but looking for work",Pakistan,"Yes, part-time","Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,Taken an online course in programming or softw...,,Academic researcher;Data or business analyst;D...,4,18,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,C;C++;C#;HTML/CSS;Java;JavaScript;SQL,C#;HTML/CSS;Java;JavaScript;PHP;SQL,,,Android,Android,ASP.NET,,.NET;Xamarin,.NET;Xamarin,Android Studio;Notepad++;Sublime Text;Visual S...,Windows,Development;Testing;Production;Outside of work...,,,Yes,Yes,No,WhatsApp,In real life (in person),Username,2015,Daily or almost daily,Find answers to specific questions;Learn how t...,Less than once per week,Stack Overflow was much faster,31-60 minutes,Yes,Daily or almost daily,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Bisexual;Straight / Heterosexual,East Asian,Yes,Appropriate in length,Easy
60066,,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Pakistan,"Yes, full-time",,I never declared a major,Taken an online course in programming or softw...,,,4,16,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Assembly;C++;C#;HTML/CSS;Java;PHP;Python;SQL,C++;C#;HTML/CSS;Java;JavaScript;PHP;Python;SQL,Firebase;MySQL;SQLite,Firebase;MongoDB;MySQL;SQLite,Android;Windows,Android;Docker;Google Cloud Platform;IBM Cloud...,,Angular/Angular.js;Django;React.js;Spring,Unity 3D,Flutter;Node.js;Unity 3D,Android Studio;NetBeans;PyCharm;Visual Studio ...,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,In real life (in person),Username,2017,A few times per week,Find answers to specific questions;Meet other ...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, definitely",Just as welcome now as I felt last year,Tech articles written by other developers;Cour...,20.0,Man,No,Straight / Heterosexual,South Asian,Yes,Appropriate in length,Easy


In [51]:
# we can get the usage of social media platforms by Pakistan using filters
filt = df["Country"] == "Pakistan"
df.loc[filt]["SocialMedia"].value_counts()

# but again these results are only for Pakistan, and we can do the same thing in one run using grouping
# that's the basic difference of grouping with filter that it can be applied on the complete dataset

WhatsApp                    266
Facebook                    232
YouTube                     182
LinkedIn                     71
Twitter                      58
Instagram                    41
Reddit                       28
I don't use social media     23
Snapchat                      5
Hello                         1
VK ВКонта́кте                 1
Name: SocialMedia, dtype: int64

In [57]:
# getting SocialMedia stats from all the countries using grouping
country_grp["SocialMedia"].value_counts().head(85)

# Now it is giving us all the countries usage of social medias
# this gives us multiple series, and didnot cover the concept of multiple series till yet

Country              SocialMedia             
Afghanistan          Facebook                     15
                     YouTube                       9
                     I don't use social media      6
                     WhatsApp                      4
                     Instagram                     1
                     LinkedIn                      1
                     Twitter                       1
Albania              WhatsApp                     18
                     Facebook                     16
                     Instagram                    13
                     YouTube                      10
                     Twitter                       8
                     LinkedIn                      7
                     Reddit                        6
                     I don't use social media      4
                     Snapchat                      1
                     WeChat 微信                     1
Algeria              YouTube                      42


In [58]:
# Getting the single country data from the group
country_grp["SocialMedia"].value_counts().loc["Pakistan"].head(50)


SocialMedia
WhatsApp                    266
Facebook                    232
YouTube                     182
LinkedIn                     71
Twitter                      58
Instagram                    41
Reddit                       28
I don't use social media     23
Snapchat                      5
Hello                         1
VK ВКонта́кте                 1
Name: SocialMedia, dtype: int64

## Application of Aggregation function on Groups

In [61]:
country_grp["ConvertedComp"].mean()

# this gives us the mean salaries of all the countries

Country
Afghanistan                             101953.333333
Albania                                  21833.700000
Algeria                                  34924.047619
Andorra                                 160931.000000
Angola                                    7764.000000
                                            ...      
Venezuela, Bolivarian Republic of...     14581.627907
Viet Nam                                 17233.436782
Yemen                                    16909.166667
Zambia                                   10075.375000
Zimbabwe                                 34046.666667
Name: ConvertedComp, Length: 179, dtype: float64

In [63]:
# What if we want to check the mean salary of Pakistan
country_grp["ConvertedComp"].mean().loc["Pakistan"]

# we are able to apply loc operator on it because our groupby column in acting as a index here

12115.1990521327

In [66]:
#  let's say we want to applu multiple aggregate function 

country_grp["ConvertedComp"].agg([np.mean,np.median])


Unnamed: 0_level_0,mean,median
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,101953.333333,6222.0
Albania,21833.700000,10818.0
Algeria,34924.047619,7878.0
Andorra,160931.000000,160931.0
Angola,7764.000000,7764.0
...,...,...
"Venezuela, Bolivarian Republic of...",14581.627907,6384.0
Viet Nam,17233.436782,11892.0
Yemen,16909.166667,11940.0
Zambia,10075.375000,5040.0


In [67]:
country_grp["ConvertedComp"].agg([np.mean,np.median]).loc["Pakistan"]


mean      12115.199052
median     7368.000000
Name: Pakistan, dtype: float64

In [73]:
# Let's say we want to check how many people in Pakistan worked on Python using filters
filt = df["Country"] == "Pakistan"
df.loc[filt]["LanguageWorkedWith"].str.contains("Python")

Respondent
84       False
119      False
298      False
299       True
311       True
         ...  
88862    False
5439       NaN
39117    False
60066     True
88182    False
Name: LanguageWorkedWith, Length: 923, dtype: object

In [74]:
df.loc[filt]["LanguageWorkedWith"].str.contains("Python").value_counts()

False    649
True     251
Name: LanguageWorkedWith, dtype: int64

In [75]:
# OR we can apply sum() function as well to get the total True results only,
# people think that sum only works on numbers but sum method also works on the boolean

df.loc[filt]["LanguageWorkedWith"].str.contains("Python").sum()

# it is giving us the total of True values

251

In [77]:
# Let's do the same with groupy object
country_grp["LanguageWorkedWith"].str.contains("Python").sum()

# this is giving us error because counrty_grp LanguageWorkedWith returns us the group by series and str method only applies
# on the series, groupby series and series are two different datatype

AttributeError: 'SeriesGroupBy' object has no attribute 'str'

In [79]:
country_grp["LanguageWorkedWith"]

# As you can see here, it is giving us SeriesGroupBy class object

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000025C0545B760>

In [81]:
# we can achieve the above task by using the apply method

country_grp["LanguageWorkedWith"].apply(lambda x: x.str.contains("Python").sum())

# Now by using the apply method we are applying the str.contains method on each series in the SeriesGroupBy class
# 

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [93]:
con = country_grp["LanguageWorkedWith"].apply(lambda x: x.str.contains("Python").sum())
con.loc["United States"]

10083

In [82]:
country_grp["LanguageWorkedWith"].apply(lambda x: x.str.contains("Python").value_counts(normalize=True))


Country           
Afghanistan  False    0.794872
             True     0.205128
Albania      False    0.722892
             True     0.277108
Algeria      False    0.682540
                        ...   
Yemen        True     0.176471
Zambia       False    0.666667
             True     0.333333
Zimbabwe     False    0.641026
             True     0.358974
Name: LanguageWorkedWith, Length: 335, dtype: float64

#### EXERCISE: 
Getting the total number of people who use python in each country using filter and grouping and concat function 


In [90]:
# Getting number of people from each country
country_respondents = df["Country"].value_counts()

In [91]:
# getting number of people who knows python
pythonic_people = country_grp["LanguageWorkedWith"].apply(lambda x: x.str.contains("Python").sum())
pythonic_people

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [115]:
# concatination 
pythonTotalPeople = pd.concat([country_respondents,pythonic_people],axis='columns')
pythonTotalPeople

Unnamed: 0,Country,LanguageWorkedWith
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
Tonga,1,0
Timor-Leste,1,1
North Korea,1,0
Brunei Darussalam,1,0


In [116]:
# Renaming the columns 
pythonTotalPeople.rename(columns={"Country":"noOfPeople", "LanguageWorkedWith":"noWhoKnowsPython"},inplace=True)

In [117]:
pythonTotalPeople

Unnamed: 0,noOfPeople,noWhoKnowsPython
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
Tonga,1,0
Timor-Leste,1,1
North Korea,1,0
Brunei Darussalam,1,0


In [121]:
# Calculating percentages
pythonTotalPeople["PCTKnowsPython"] = (pythonTotalPeople["noWhoKnowsPython"] / pythonTotalPeople["noOfPeople"])*100

pythonTotalPeople

Unnamed: 0,noOfPeople,noWhoKnowsPython,PCTKnowsPython
United States,20949,10083,48.131176
India,9061,3105,34.267741
Germany,5866,2451,41.783157
United Kingdom,5737,2384,41.554820
Canada,3395,1558,45.891016
...,...,...,...
Tonga,1,0,0.000000
Timor-Leste,1,1,100.000000
North Korea,1,0,0.000000
Brunei Darussalam,1,0,0.000000


In [124]:
pythonTotalPeople.sort_values(by="PCTKnowsPython", ascending = False)

Unnamed: 0,noOfPeople,noWhoKnowsPython,PCTKnowsPython
Sao Tome and Principe,1,1,100.000000
Timor-Leste,1,1,100.000000
Dominica,1,1,100.000000
Niger,1,1,100.000000
Turkmenistan,7,6,85.714286
...,...,...,...
Cape Verde,3,0,0.000000
Lao People's Democratic Republic,3,0,0.000000
Malawi,2,0,0.000000
Liberia,2,0,0.000000
