# Pandas 
### (Panda, Panda, Panda)

In [1]:
import numpy as np
import pandas as pd

### Indexing list

In [2]:
list1 = ['Shashank','Saket','Shaurya','Divy','Raghav']
index = [1,2,3,4,5]
custom_index = ['ich','ni','sun','say','go']

custom_indexed = pd.Series(list1 , custom_index)
indexed = pd.Series(list1 , index)

In [3]:
print(indexed)

## Custom index can be set 
custom_indexed

1    Shashank
2       Saket
3     Shaurya
4        Divy
5      Raghav
dtype: object


ich    Shashank
ni        Saket
sun     Shaurya
say        Divy
go       Raghav
dtype: object

In [4]:
 ## Can index the values by default or map them with another list
    
arr1 = np.array([1,3,5,2,6] ,dtype = 'float32') 
ser1 = pd.Series(arr1)
print(ser1 , '\n\n')

dict1 = {'f_name' : 'Shashank' , 'l_name' : 'Chaudhary' , 'age' : 18 , 'l_achievement' : 12}

## doesn't necessarily need to have the name to create a series using dictionary
 ### AS IN CASE OF APPENDING (mentioned below (somewhere))
ser2 = pd.Series(dict1 , name='converting dictionaries')
print(ser2 , '\n\nname of the sereis: ' , ser2.name)

0    1.0
1    3.0
2    5.0
3    2.0
4    6.0
dtype: float32 


f_name            Shashank
l_name           Chaudhary
age                     18
l_achievement           12
Name: converting dictionaries, dtype: object 

name of the sereis:  converting dictionaries


## Dataframes 

### Creating dataframes

In [5]:
arr = np.random.randint(10,50,size=(2,3))
df1 = pd.DataFrame(arr , ['A','B'],['C','D','E'])
df1

Unnamed: 0,C,D,E
A,19,43,15
B,31,16,32


In [6]:
dict3 = {'one' : pd.Series([1,2,3], index = ['a','b','c']) , 'two' : pd.Series([1,2,3,4], index= ['a','b','c','d'])}

pd.DataFrame(dict3)

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [7]:
dict([('a',[1,2,3]),('b',[4,5,6])])


{'a': [1, 2, 3], 'b': [4, 5, 6]}

In [8]:
## Dataframes using dictionary
## Dataframe.from_dict is not necessarily needed unless orient needs to be different from
## default
pd.DataFrame.from_dict(dict([('a',[1,2,3]),('b',[4,5,6])]))

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [9]:
df3 = pd.DataFrame.from_dict(dict([('a',[1,2,3]),('b',[4,5,6])]) , orient= 'index' , columns=['one','two','three'])
df3

Unnamed: 0,one,two,three
a,1,2,3
b,4,5,6


## Editing and Retrieving Data

In [10]:
print(df3 , '\n')
print(df3[['one','three']] , '\n') 
print(df3.loc['a','one'],'\n')
print(df3.iloc[1],'\n')
print(df3.loc[['a','b'],['one','three']])
df3['Total'] = df3['one']+df3['two']+df3['three'] 
df3

   one  two  three
a    1    2      3
b    4    5      6 

   one  three
a    1      3
b    4      6 

1 

one      4
two      5
three    6
Name: b, dtype: int64 

   one  three
a    1      3
b    4      6


Unnamed: 0,one,two,three,Total
a,1,2,3,6
b,4,5,6,15


In [11]:
new_row = {'one' : 8 , 'two' : 23, 'three' : 'new'}

## Can append the dictionary only if ignore_index = True
print(df3.append(new_row , ignore_index=True))

## Doesn't change the df3 unless done (df3 = df3.append(new_row , ignore_index = True))
df3

   one  two three  Total
0    1    2     3    6.0
1    4    5     6   15.0
2    8   23   new    NaN


Unnamed: 0,one,two,three,Total
a,1,2,3,6
b,4,5,6,15


In [12]:
n_row = pd.Series(new_row , name = 'S')

## Can only append the series if sereis has a name or ignore_index = True
df3.append(n_row)

Unnamed: 0,one,two,three,Total
a,1,2,3,6.0
b,4,5,6,15.0
S,8,23,new,


# Pandas COREY SCHAFER tutorial

### Reading data from pandas

In [13]:
df = pd.read_csv('data/survey_results_public.csv')

In [14]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,Yes,,16,,,,United States,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,Yes,,,,,,Morocco,,...,,,,,,,,,,
64458,64898,,Yes,,,,,,Viet Nam,,...,,,,,,,,,,
64459,64925,,Yes,,,,,,Poland,,...,,,,,Angular;Angular.js;React.js,,,,,


In [15]:
def isInt(value):
    try:
        int(value)
        return True
    except ValueError:
        return False

In [16]:
age_list = [int(age) for age in df['Age1stCode'] if isInt(age)]

In [17]:
df.shape ## Dimensionality of the DataFrame

(64461, 61)

In [18]:
df.info() ## Concise Summary of a Dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64461 entries, 0 to 64460
Data columns (total 61 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Respondent                    64461 non-null  int64  
 1   MainBranch                    64162 non-null  object 
 2   Hobbyist                      64416 non-null  object 
 3   Age                           45446 non-null  float64
 4   Age1stCode                    57900 non-null  object 
 5   CompFreq                      40069 non-null  object 
 6   CompTotal                     34826 non-null  float64
 7   ConvertedComp                 34756 non-null  float64
 8   Country                       64072 non-null  object 
 9   CurrencyDesc                  45472 non-null  object 
 10  CurrencySymbol                45472 non-null  object 
 11  DatabaseDesireNextYear        44070 non-null  object 
 12  DatabaseWorkedWith            49537 non-null  object 
 13  D

In [19]:
pd.set_option('display.max_columns',61) ## Changing my settings to show all the columns 

In [20]:
df ## Showing all the columns

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Slightly satisfied,I am not interested in new job opportunities,C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,No,Somewhat important,Fairly important,,,Once a year,Not sure,,No,Often: 1-2 days per week or more,Start a free trial;Ask developers I know/work ...,Amused,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Windows,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,,Straight / Heterosexual,No,"No, not at all",,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Very dissatisfied,I am not interested in new job opportunities,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Github;Slack,Confluence;Jira;Github;Gitlab;Slack,,,Fairly important,,,Once a year,Not sure,,No,,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,MacOS,"1,000 to 4,999 employees",iOS;Kubernetes;Linux;MacOS,iOS,I have little or no influence,,Yes,"Yes, definitely",Less than once per month or monthly,Multiple times per day,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,,,,,,,,,,,Objective-C;Python;Swift,Objective-C;Python;Swift,,,,,,,,,,Once a decade,,,No,,,,Stack Overflow (public Q&A for anyone who codes),,Linux-based,,,,,,Yes,"Yes, somewhat",A few times per month or weekly,Daily or almost daily,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,White or of European descent,Man,Flex time or a flexible schedule;Office enviro...,Slightly dissatisfied,"I’m not actively looking, but I am open to new...",,,,,,,No,,Not at all important/not necessary,Curious about other opportunities;Wanting to w...,,Once a year,Not sure,Yes,Yes,Occasionally: 1-2 days per quarter but less th...,,,Stack Overflow (public Q&A for anyone who code...,,Linux-based,20 to 99 employees,,,I have a great deal of influence,Straight / Heterosexual,Yes,"Yes, definitely",A few times per month or weekly,Multiple times per day,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,MySQL;PostgreSQL,MySQL;PostgreSQL;Redis;SQLite,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,,,,Java;Ruby;Scala,HTML/CSS;Ruby;SQL,Ansible;Chef,Ansible,"Github;Google Suite (Docs, Meet, etc)",Confluence;Jira;Github;Slack;Google Suite (Doc...,,,Very important,,,Once a year,No,,Yes,,Start a free trial;Ask developers I know/work ...,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,Windows,,Docker;Google Cloud Platform;Heroku;Linux;Windows,AWS;Docker;Linux;MacOS;Windows,,Straight / Heterosexual,Yes,"Yes, somewhat",Less than once per month or monthly,A few times per month or weekly,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,Yes,,16,,,,United States,,,,,Senior executive/VP,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,,,,,,,,,,,,,,Very important,,,Once a decade,,,,,Start a free trial,Amused,Stack Overflow (public Q&A for anyone who codes),Call a coworker or friend,Windows,,,,,,,,,,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,Yes,,,,,,Morocco,,,Cassandra;Couchbase;DynamoDB;Elasticsearch;Fir...,Cassandra;Couchbase;DynamoDB;Elasticsearch;Fir...,,,Employed full-time,,,,,,Assembly;Bash/Shell/PowerShell;C;C#;C++;Dart;G...,Assembly;Bash/Shell/PowerShell;C;C#;C++;Dart;G...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64458,64898,,Yes,,,,,,Viet Nam,,,,,,Primary/elementary school,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64459,64925,,Yes,,,,,,Poland,,,DynamoDB;Elasticsearch;MongoDB;MySQL;PostgreSQL,Oracle,,,Employed full-time,,,,,,HTML/CSS;Java;JavaScript,HTML/CSS,Node.js,,Github;Gitlab,Confluence;Jira;Slack;Microsoft Teams,,,,,,Once a year,,,,,Start a free trial,"Hello, old friend",Stack Overflow (public Q&A for anyone who codes),Call a coworker or friend;Visit Stack Overflow,Windows,,,Linux;Windows,,,,,,,,,,,Angular;Angular.js;React.js,,,,,


In [21]:
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [22]:
schema_df

Unnamed: 0,Column,QuestionText
0,Respondent,Randomized respondent ID number (not in order ...
1,MainBranch,Which of the following options best describes ...
2,Hobbyist,Do you code as a hobby?
3,Age,What is your age (in years)? If you prefer not...
4,Age1stCode,At what age did you write your first line of c...
...,...,...
56,WebframeWorkedWith,Which web frameworks have you done extensive d...
57,WelcomeChange,"Compared to last year, how welcome do you feel..."
58,WorkWeekHrs,"On average, how many hours per week do you wor..."
59,YearsCode,"Including any education, how many years have y..."


In [23]:
pd.set_option('display.max_rows' , 61)
print(schema_df.head(6))  ## First 6 data rows
schema_df.tail(6) ## Last 6 data rows

       Column                                       QuestionText
0  Respondent  Randomized respondent ID number (not in order ...
1  MainBranch  Which of the following options best describes ...
2    Hobbyist                            Do you code as a hobby?
3         Age  What is your age (in years)? If you prefer not...
4  Age1stCode  At what age did you write your first line of c...
5    CompFreq   Is that compensation weekly, monthly, or yearly?


Unnamed: 0,Column,QuestionText
55,WebframeDesireNextYear,Which web frameworks have you done extensive d...
56,WebframeWorkedWith,Which web frameworks have you done extensive d...
57,WelcomeChange,"Compared to last year, how welcome do you feel..."
58,WorkWeekHrs,"On average, how many hours per week do you wor..."
59,YearsCode,"Including any education, how many years have y..."
60,YearsCodePro,"NOT including education, how many years have y..."


In [24]:
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,EUR,Microsoft SQL Server,Elasticsearch;Microsoft SQL Server;Oracle,"Developer, desktop or enterprise applications;...","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,"Languages, frameworks, and other technologies ...",Slightly satisfied,I am not interested in new job opportunities,C#;HTML/CSS;JavaScript,C#;HTML/CSS;JavaScript,.NET Core;Xamarin,.NET;.NET Core,Microsoft Teams;Microsoft Azure;Trello,Confluence;Jira;Slack;Microsoft Azure;Trello,No,Somewhat important,Fairly important,,,Once a year,Not sure,,No,Often: 1-2 days per week or more,Start a free trial;Ask developers I know/work ...,Amused,Stack Overflow (public Q&A for anyone who codes),Visit Stack Overflow;Go for a walk or other ph...,Windows,2 to 9 employees,Android;iOS;Kubernetes;Microsoft Azure;Windows,Windows,,Straight / Heterosexual,No,"No, not at all",,Multiple times per day,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27.0
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,GBP,,,"Developer, full-stack;Developer, mobile","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,,,,Very dissatisfied,I am not interested in new job opportunities,Python;Swift,JavaScript;Swift,React Native;TensorFlow;Unity 3D,React Native,Github;Slack,Confluence;Jira;Github;Gitlab;Slack,,,Fairly important,,,Once a year,Not sure,,No,,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,MacOS,"1,000 to 4,999 employees",iOS;Kubernetes;Linux;MacOS,iOS,I have little or no influence,,Yes,"Yes, definitely",Less than once per month or monthly,Multiple times per day,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4.0
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,,,,,,,,,,,,Objective-C;Python;Swift,Objective-C;Python;Swift,,,,,,,,,,Once a decade,,,No,,,,Stack Overflow (public Q&A for anyone who codes),,Linux-based,,,,,,Yes,"Yes, somewhat",A few times per month or weekly,Daily or almost daily,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,ALL,,,,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",,White or of European descent,Man,Flex time or a flexible schedule;Office enviro...,Slightly dissatisfied,"I’m not actively looking, but I am open to new...",,,,,,,No,,Not at all important/not necessary,Curious about other opportunities;Wanting to w...,,Once a year,Not sure,Yes,Yes,Occasionally: 1-2 days per quarter but less th...,,,Stack Overflow (public Q&A for anyone who code...,,Linux-based,20 to 99 employees,,,I have a great deal of influence,Straight / Heterosexual,Yes,"Yes, definitely",A few times per month or weekly,Multiple times per day,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4.0
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,,MySQL;PostgreSQL,MySQL;PostgreSQL;Redis;SQLite,,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,,,,Java;Ruby;Scala,HTML/CSS;Ruby;SQL,Ansible;Chef,Ansible,"Github;Google Suite (Docs, Meet, etc)",Confluence;Jira;Github;Slack;Google Suite (Doc...,,,Very important,,,Once a year,No,,Yes,,Start a free trial;Ask developers I know/work ...,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,Windows,,Docker;Google Cloud Platform;Heroku;Linux;Windows,AWS;Docker;Linux;MacOS;Windows,,Straight / Heterosexual,Yes,"Yes, somewhat",Less than once per month or monthly,A few times per month or weekly,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8.0


In [25]:
### shashank.k.chaudhary@gmail.com

In [26]:
## Again creating a dataframe using dictionary
people = {
    'f_name' : ['Shashank','Saket','Raghav','Divy'],
    'l_name' : ['Chaudhary' ,'', 'Saboo','Badgurjar'],
    'email' : ['shashank.k.chaudhary@gmail.com','saket@gmail.com','saboo.raghav@gmail.com','divy.bd@gmail.com'],
    'loc' : ['Darbhanga','Patna','Gurgaon','Jaipur']
}

In [27]:
dict_df = pd.DataFrame(people)
## list of f_name is sereis, list of l_name is series , list of email is sereis and list of loc in series
dict_df

Unnamed: 0,f_name,l_name,email,loc
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,Darbhanga
1,Saket,,saket@gmail.com,Patna
2,Raghav,Saboo,saboo.raghav@gmail.com,Gurgaon
3,Divy,Badgurjar,divy.bd@gmail.com,Jaipur


In [28]:
print(dict_df.email, '\n')

## GIVES THE EXACTLY SAME THING
dict_df['email']

0    shashank.k.chaudhary@gmail.com
1                   saket@gmail.com
2            saboo.raghav@gmail.com
3                 divy.bd@gmail.com
Name: email, dtype: object 



0    shashank.k.chaudhary@gmail.com
1                   saket@gmail.com
2            saboo.raghav@gmail.com
3                 divy.bd@gmail.com
Name: email, dtype: object

In [29]:
print(type(dict_df['email']))

## Noticed that even the empty cell is being counted -> pd is taking that as a string 
## used '' to fill the ap and it is a String 
### Doesn't show NaN
dict_df.count()

<class 'pandas.core.series.Series'>


f_name    4
l_name    4
email     4
loc       4
dtype: int64

In [30]:
## Getting comlumns in a DataFrame
print(df.columns)

## row information at index 0
df.iloc[0]

Index(['Respondent', 'MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq',
       'CompTotal', 'ConvertedComp', 'Country', 'CurrencyDesc',
       'CurrencySymbol', 'DatabaseDesireNextYear', 'DatabaseWorkedWith',
       'DevType', 'EdLevel', 'Employment', 'Ethnicity', 'Gender', 'JobFactors',
       'JobSat', 'JobSeek', 'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
  

Respondent                                                                      1
MainBranch                                         I am a developer by profession
Hobbyist                                                                      Yes
Age                                                                           NaN
Age1stCode                                                                     13
CompFreq                                                                  Monthly
CompTotal                                                                     NaN
ConvertedComp                                                                 NaN
Country                                                                   Germany
CurrencyDesc                                                        European Euro
CurrencySymbol                                                                EUR
DatabaseDesireNextYear                                       Microsoft SQL Server
DatabaseWorkedWi

### Retreiving data from DataFrame

In [31]:
## iloc is basically integer location
## DataFrame.iloc[list of rows, list of columns]
df.iloc[[1,6],[1,5]]

Unnamed: 0,MainBranch,CompFreq
1,I am a developer by profession,
6,I am a developer by profession,Monthly


In [32]:
dict_df

Unnamed: 0,f_name,l_name,email,loc
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,Darbhanga
1,Saket,,saket@gmail.com,Patna
2,Raghav,Saboo,saboo.raghav@gmail.com,Gurgaon
3,Divy,Badgurjar,divy.bd@gmail.com,Jaipur


In [33]:
## loc is the label based location
## Because index has integer as the label we can access them using the integer

## We get the resultant DataFrame in the same Order as in the list
dict_df.loc[[2,0],['f_name','email']]

Unnamed: 0,f_name,email
2,Raghav,saboo.raghav@gmail.com
0,Shashank,shashank.k.chaudhary@gmail.com


In [34]:
## to use the slicing we need not give the sqaure brackets as in case of list

## unlike slicing in python, last index is inclusive
## we get the index from 0-4 including 4 and Age-Country including 'Country'
df.loc[0:4, 'Age':'Country']

Unnamed: 0,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country
0,,13,Monthly,,,Germany
1,,19,,,,United Kingdom
2,,15,,,,Russian Federation
3,25.0,18,,,,Albania
4,31.0,16,,,,United States


### Setting index 

In [35]:
## Setting one of the columns as index
## Doesn't modify the dataFrame itself unless done explicitly

dict_df.set_index('email')

Unnamed: 0_level_0,f_name,l_name,loc
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
shashank.k.chaudhary@gmail.com,Shashank,Chaudhary,Darbhanga
saket@gmail.com,Saket,,Patna
saboo.raghav@gmail.com,Raghav,Saboo,Gurgaon
divy.bd@gmail.com,Divy,Badgurjar,Jaipur


In [36]:
dict_df

Unnamed: 0,f_name,l_name,email,loc
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,Darbhanga
1,Saket,,saket@gmail.com,Patna
2,Raghav,Saboo,saboo.raghav@gmail.com,Gurgaon
3,Divy,Badgurjar,divy.bd@gmail.com,Jaipur


In [37]:
## to change the DataFraem itself we use inplace = True

dict_df.set_index('email' , inplace = True)


In [38]:
dict_df

Unnamed: 0_level_0,f_name,l_name,loc
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
shashank.k.chaudhary@gmail.com,Shashank,Chaudhary,Darbhanga
saket@gmail.com,Saket,,Patna
saboo.raghav@gmail.com,Raghav,Saboo,Gurgaon
divy.bd@gmail.com,Divy,Badgurjar,Jaipur


In [39]:
dict_df.index

Index(['shashank.k.chaudhary@gmail.com', 'saket@gmail.com',
       'saboo.raghav@gmail.com', 'divy.bd@gmail.com'],
      dtype='object', name='email')

In [40]:
dict_df.loc['shashank.k.chaudhary@gmail.com','l_name']

## Can't use dict_df.loc[0] anymore but can use dict_df.iloc[0]

'Chaudhary'

In [41]:
### Resting index to the default

dict_df.reset_index(inplace=True)
dict_df

Unnamed: 0,email,f_name,l_name,loc
0,shashank.k.chaudhary@gmail.com,Shashank,Chaudhary,Darbhanga
1,saket@gmail.com,Saket,,Patna
2,saboo.raghav@gmail.com,Raghav,Saboo,Gurgaon
3,divy.bd@gmail.com,Divy,Badgurjar,Jaipur


In [42]:
### In case if we can set one of the columns as the index when reading the csv
df_schema = pd.read_csv('data/survey_results_schema.csv' , index_col='Column')

In [43]:
## As we can see the column is acting as the index
df_schema.loc['Respondent']

QuestionText    Randomized respondent ID number (not in order ...
Name: Respondent, dtype: object

In [44]:
## Data sorted in albhabetical order
df_schema.sort_index()

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Age,What is your age (in years)? If you prefer not...
Age1stCode,At what age did you write your first line of c...
CompFreq,"Is that compensation weekly, monthly, or yearly?"
CompTotal,What is your current total compensation (salar...
ConvertedComp,Salary converted to annual USD salaries using ...
Country,Where do you live?
CurrencyDesc,Which currency do you use day-to-day? If your ...
CurrencySymbol,Which currency do you use day-to-day? If your ...
DatabaseDesireNextYear,Which database environments have you done exte...
DatabaseWorkedWith,Which database environments have you done exte...


In [45]:
## Can also do this in the reverse order 
df_schema.sort_index(ascending = False, inplace = True)

## Whenever we want to make changes and save it directly to our dataFrame we use 
## "Inplace = True"
df_schema

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
YearsCodePro,"NOT including education, how many years have y..."
YearsCode,"Including any education, how many years have y..."
WorkWeekHrs,"On average, how many hours per week do you wor..."
WelcomeChange,"Compared to last year, how welcome do you feel..."
WebframeWorkedWith,Which web frameworks have you done extensive d...
WebframeDesireNextYear,Which web frameworks have you done extensive d...
UndergradMajor,What was your primary field of study?
Trans,Are you transgender?
SurveyLength,How do you feel about the length of the survey...
SurveyEase,How easy or difficult was this survey to compl...


## Filtering the data

In [46]:
## Did this by mistake but we can see that it gives all the values of that column the same value (only helpful when we need to concatenate datasets and add another column to differentiate the data)
dict_df['l_name'] = ''

In [47]:
dict_df

Unnamed: 0,email,f_name,l_name,loc
0,shashank.k.chaudhary@gmail.com,Shashank,,Darbhanga
1,saket@gmail.com,Saket,,Patna
2,saboo.raghav@gmail.com,Raghav,,Gurgaon
3,divy.bd@gmail.com,Divy,,Jaipur


In [48]:
## Gives the boolean of the series of the column
dict_df['f_name'] == 'Saket'

0    False
1     True
2    False
3    False
Name: f_name, dtype: bool

In [49]:
filt = (dict_df['f_name'] == 'Shashank')

### Both gives the same output
print(dict_df[filt])
print(dict_df.loc[filt])

dict_df.loc[filt, 'email']

                            email    f_name l_name        loc
0  shashank.k.chaudhary@gmail.com  Shashank         Darbhanga
                            email    f_name l_name        loc
0  shashank.k.chaudhary@gmail.com  Shashank         Darbhanga


0    shashank.k.chaudhary@gmail.com
Name: email, dtype: object

In [50]:
new_dict = {'fname' : ['Shashank' , 'Divy' , 'Raghav' , 'Abhinav' , 'Gowrinath'] , 'lname' : ['Chaudhary' , 'Badgurjar' , 'Saboo', 'Kumar' , ''] , 'email' : ['shashank.k.chaudhary@gmail.com' , 'divy@gmail.com' , 'saboo@email.com' , 'abhinav@gmail.com' , 'gowri@gmail.com'] , 'Branch' : ['IT' , 'EEE' , 'EEE' , 'IT' , 'EEE' ]}

df_new = pd.DataFrame(new_dict)

In [51]:
## & and | used instead of 'and'  and 'or' as in case of pure python

filt_new = (df_new['Branch'] == 'IT') & (df_new['fname'] == 'Shashank')
df_new.loc[filt_new , 'email']

0    shashank.k.chaudhary@gmail.com
Name: email, dtype: object

In [52]:
filt_new = (df_new['Branch'] == 'IT') | (df_new['fname'] == 'Divy')

## negation of the filter using '~'
df_new.loc[~filt_new , 'email']

2    saboo@email.com
4    gowri@gmail.com
Name: email, dtype: object

In [53]:
## high_salary is finding rows with salary>70000 , country : India and Language contains Python {na:False}(means ignore NaN values)
high_salary = (df['ConvertedComp'] > 70000) & (df['Country'] == 'India') & (df['LanguageWorkedWith'].str.contains('Python' , na =False))

## Getting the high_salary values and few other columns and droping the NaN value containing rows
## Before droping NaN value rows we had 13178 rows and after that we had 12756
## "& (df['Country'] == 'India'" added later

print(df.loc[high_salary , ['Country' , 'LanguageWorkedWith' , 'ConvertedComp']].shape)
df.loc[high_salary , ['Country' , 'LanguageWorkedWith' , 'ConvertedComp']].dropna()

(63, 3)


Unnamed: 0,Country,LanguageWorkedWith,ConvertedComp
277,India,Bash/Shell/PowerShell;C++;HTML/CSS;JavaScript;...,150792.0
3318,India,Go;Java;Python;Scala;SQL,1000000.0
3586,India,Bash/Shell/PowerShell;Go;Java;JavaScript;Kotli...,75396.0
5886,India,Bash/Shell/PowerShell;Dart;HTML/CSS;Java;Kotli...,134028.0
6084,India,C#;HTML/CSS;JavaScript;Python,435600.0
...,...,...,...
54218,India,Bash/Shell/PowerShell;C;C#;C++;Dart;HTML/CSS;J...,167544.0
54254,India,HTML/CSS;JavaScript;PHP;Python;SQL;TypeScript,301572.0
58170,India,Python;SQL,753936.0
60727,India,Bash/Shell/PowerShell;C;C++;Go;HTML/CSS;Java;J...,83770.0


In [54]:
df_new.columns

Index(['fname', 'lname', 'email', 'Branch'], dtype='object')

### Renaming column names

In [55]:
## one way to do is using the list comprehension but only if we need to make similar changes to all the column names
## e.g. rpelacing space with underscore, or lower case or upper case or something similar

df_new.columns = [x.upper() for x in df_new.columns]
df_new

Unnamed: 0,FNAME,LNAME,EMAIL,BRANCH
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,IT
1,Divy,Badgurjar,divy@gmail.com,EEE
2,Raghav,Saboo,saboo@email.com,EEE
3,Abhinav,Kumar,abhinav@gmail.com,IT
4,Gowrinath,,gowri@gmail.com,EEE


In [56]:
## I liked the lower case names better so I am going to revert it back to as it was
df_new.columns = [x.lower() for x in df_new.columns]
df_new

Unnamed: 0,fname,lname,email,branch
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,IT
1,Divy,Badgurjar,divy@gmail.com,EEE
2,Raghav,Saboo,saboo@email.com,EEE
3,Abhinav,Kumar,abhinav@gmail.com,IT
4,Gowrinath,,gowri@gmail.com,EEE


In [57]:
## If we want to chnage the names of the columns to something new

df_new.rename(columns = {'fname' : 'f_name' , 'lname' : 'l_name'} , inplace = True)
## pass columns as dictionary in format old_name : new_name in DataFrame.rename

In [58]:
df_new

Unnamed: 0,f_name,l_name,email,branch
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,IT
1,Divy,Badgurjar,divy@gmail.com,EEE
2,Raghav,Saboo,saboo@email.com,EEE
3,Abhinav,Kumar,abhinav@gmail.com,IT
4,Gowrinath,,gowri@gmail.com,EEE


In [59]:
df_new['school'] = pd.Series(['SITE' , 'SELECT' , 'SELECT' , 'SITE', 'SELECT'])
df_new

Unnamed: 0,f_name,l_name,email,branch,school
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,IT,SITE
1,Divy,Badgurjar,divy@gmail.com,EEE,SELECT
2,Raghav,Saboo,saboo@email.com,EEE,SELECT
3,Abhinav,Kumar,abhinav@gmail.com,IT,SITE
4,Gowrinath,,gowri@gmail.com,EEE,SELECT


In [60]:
df_new.pop('branch')

0     IT
1    EEE
2    EEE
3     IT
4    EEE
Name: branch, dtype: object

In [61]:
df_new

Unnamed: 0,f_name,l_name,email,school
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,SITE
1,Divy,Badgurjar,divy@gmail.com,SELECT
2,Raghav,Saboo,saboo@email.com,SELECT
3,Abhinav,Kumar,abhinav@gmail.com,SITE
4,Gowrinath,,gowri@gmail.com,SELECT


In [62]:
df_new[['f_name','school']][[x=='SITE' for x in df_new['school']]]

Unnamed: 0,f_name,school
0,Shashank,SITE
3,Abhinav,SITE


In [63]:
## updating row values more than one at a time
df_new.loc[4 , ['l_name' , 'f_name']] = ['Nath' , 'Gowri']
df_new

Unnamed: 0,f_name,l_name,email,school
0,Shashank,Chaudhary,shashank.k.chaudhary@gmail.com,SITE
1,Divy,Badgurjar,divy@gmail.com,SELECT
2,Raghav,Saboo,saboo@email.com,SELECT
3,Abhinav,Kumar,abhinav@gmail.com,SITE
4,Gowri,Nath,gowri@gmail.com,SELECT


In [64]:
## ## ### #### # NOTE!!!

## updating values using filters
## create a filter let it be "filt"

## df[filt]['l_name'] = 'Smith' (this can't be done as this gives a copy not the dataFram)

## instead we do this df.loc[filt , 'l_name'] = 'Smith'

In [65]:
df_new['f_name'] = df_new['f_name'].str.upper()
df_new

Unnamed: 0,f_name,l_name,email,school
0,SHASHANK,Chaudhary,shashank.k.chaudhary@gmail.com,SITE
1,DIVY,Badgurjar,divy@gmail.com,SELECT
2,RAGHAV,Saboo,saboo@email.com,SELECT
3,ABHINAV,Kumar,abhinav@gmail.com,SITE
4,GOWRI,Nath,gowri@gmail.com,SELECT


In [66]:
### apply , map , applymap , replace
## dataFrame['column_name'].apply(func)

## What it does is func(column_name_values) for each row

df_new['l_name'].apply(len)

0    9
1    9
2    5
3    5
4    4
Name: l_name, dtype: int64

In [67]:
## apply to a DataFrame
print(df_new.apply(len))
df_new.apply(len , axis = 'columns')

f_name    5
l_name    5
email     5
school    5
dtype: int64


0    4
1    4
2    4
3    4
4    4
dtype: int64

In [68]:
## apply the func to every elelment inside the DataFrame
df_new.applymap(len)

Unnamed: 0,f_name,l_name,email,school
0,8,9,30,4
1,4,9,14,6
2,6,5,15,6
3,7,5,17,4
4,5,4,15,6


In [69]:
## map method works on the series only
## this makes all the other values that hasn't beed touched as NaN

df_new['l_name'].map({'Chaudhary' : 'chy' , 'Badgurjar' : 'Bd' , 'Saboo' : 'Sb'})

0    chy
1     Bd
2     Sb
3    NaN
4    NaN
Name: l_name, dtype: object

In [70]:
## this only replaces the value that we want to and preserve the rest of the values
df_new['l_name'].replace({'Chaudhary': 'Chy' , 'Badgurjar' : 'Bd'})

0      Chy
1       Bd
2    Saboo
3    Kumar
4     Nath
Name: l_name, dtype: object

In [71]:
df_new['school'].replace({'SITE' : 'IT'})

0        IT
1    SELECT
2    SELECT
3        IT
4    SELECT
Name: school, dtype: object

In [72]:
df_new.rename(columns = {'school' : 'branch'} , inplace = True)

In [73]:
df_new

Unnamed: 0,f_name,l_name,email,branch
0,SHASHANK,Chaudhary,shashank.k.chaudhary@gmail.com,SITE
1,DIVY,Badgurjar,divy@gmail.com,SELECT
2,RAGHAV,Saboo,saboo@email.com,SELECT
3,ABHINAV,Kumar,abhinav@gmail.com,SITE
4,GOWRI,Nath,gowri@gmail.com,SELECT


In [74]:
df_new['branch'] = df_new['branch'].replace({'SITE' : 'IT' , 'SELECT' : 'EEE'})
df_new

Unnamed: 0,f_name,l_name,email,branch
0,SHASHANK,Chaudhary,shashank.k.chaudhary@gmail.com,IT
1,DIVY,Badgurjar,divy@gmail.com,EEE
2,RAGHAV,Saboo,saboo@email.com,EEE
3,ABHINAV,Kumar,abhinav@gmail.com,IT
4,GOWRI,Nath,gowri@gmail.com,EEE


In [75]:
df_new['full_name'] = df_new['f_name'] + ' ' + df_new['l_name']
df_new

Unnamed: 0,f_name,l_name,email,branch,full_name
0,SHASHANK,Chaudhary,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary
1,DIVY,Badgurjar,divy@gmail.com,EEE,DIVY Badgurjar
2,RAGHAV,Saboo,saboo@email.com,EEE,RAGHAV Saboo
3,ABHINAV,Kumar,abhinav@gmail.com,IT,ABHINAV Kumar
4,GOWRI,Nath,gowri@gmail.com,EEE,GOWRI Nath


In [76]:
df_new.drop(columns = ['f_name', 'l_name'] , inplace = True)
df_new

Unnamed: 0,email,branch,full_name
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo
3,abhinav@gmail.com,IT,ABHINAV Kumar
4,gowri@gmail.com,EEE,GOWRI Nath


In [77]:
df_new['full_name'].str.split(' ' , expand = True)

Unnamed: 0,0,1
0,SHASHANK,Chaudhary
1,DIVY,Badgurjar
2,RAGHAV,Saboo
3,ABHINAV,Kumar
4,GOWRI,Nath


In [78]:
df_new[['f_name', 'l_name']] = df_new['full_name'].str.split(' ' , expand = True)
df_new

Unnamed: 0,email,branch,full_name,f_name,l_name
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath


In [79]:
df_new.append({'f_name' : 'Starc' , 'l_name' : 'Doe'} , ignore_index = True)

Unnamed: 0,email,branch,full_name,f_name,l_name
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
5,,,,Starc,Doe


In [80]:
df_append = pd.DataFrame( {'f_name' : ['Yama' , 'Dakuta'] , 'l_name' : ['Gucci' , 'Qamachi']})
df_append

Unnamed: 0,f_name,l_name
0,Yama,Gucci
1,Dakuta,Qamachi


In [81]:
df_new = df_new.append(df_append , ignore_index = True , sort = False)

In [82]:
df_new

Unnamed: 0,email,branch,full_name,f_name,l_name
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
5,,,,Yama,Gucci
6,,,,Dakuta,Qamachi


In [83]:
df_new.drop(index= 6)

Unnamed: 0,email,branch,full_name,f_name,l_name
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
5,,,,Yama,Gucci


In [84]:
## droping all the rows with branch 'IT'
df_new.drop(index = df_new[df_new['branch'] == 'IT'].index)

Unnamed: 0,email,branch,full_name,f_name,l_name
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
5,,,,Yama,Gucci
6,,,,Dakuta,Qamachi


In [85]:
## Same thing as above
filt = (df_new['branch'] == 'IT')
df_new.drop(index = df_new[filt].index)

Unnamed: 0,email,branch,full_name,f_name,l_name
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
5,,,,Yama,Gucci
6,,,,Dakuta,Qamachi


## Sorting Data

In [86]:
df_new.sort_values(by = 'l_name' , ascending = False) ## Sorting the data by th l_name column in descending order

Unnamed: 0,email,branch,full_name,f_name,l_name
2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
6,,,,Dakuta,Qamachi
4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
5,,,,Yama,Gucci
0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar


In [87]:
print(df_new.sort_values(by = ['branch' , 'f_name'] , ascending = [False , True]).sort_index())

## This is the difference between reset_index and sort_index

df_new.sort_values(by = ['branch' , 'f_name'] , ascending = [False , True]).reset_index()
### This again sorts the data first on f_name and if common datas in f_name then on l_name
## Ascending has list of bools which is saying sort the data using branch in descending order and f_name in ascending order

                            email branch           full_name    f_name  \
0  shashank.k.chaudhary@gmail.com     IT  SHASHANK Chaudhary  SHASHANK   
1                  divy@gmail.com    EEE      DIVY Badgurjar      DIVY   
2                 saboo@email.com    EEE        RAGHAV Saboo    RAGHAV   
3               abhinav@gmail.com     IT       ABHINAV Kumar   ABHINAV   
4                 gowri@gmail.com    EEE          GOWRI Nath     GOWRI   
5                             NaN    NaN                 NaN      Yama   
6                             NaN    NaN                 NaN    Dakuta   

      l_name  
0  Chaudhary  
1  Badgurjar  
2      Saboo  
3      Kumar  
4       Nath  
5      Gucci  
6    Qamachi  


Unnamed: 0,index,email,branch,full_name,f_name,l_name
0,3,abhinav@gmail.com,IT,ABHINAV Kumar,ABHINAV,Kumar
1,0,shashank.k.chaudhary@gmail.com,IT,SHASHANK Chaudhary,SHASHANK,Chaudhary
2,1,divy@gmail.com,EEE,DIVY Badgurjar,DIVY,Badgurjar
3,4,gowri@gmail.com,EEE,GOWRI Nath,GOWRI,Nath
4,2,saboo@email.com,EEE,RAGHAV Saboo,RAGHAV,Saboo
5,6,,,,Dakuta,Qamachi
6,5,,,,Yama,Gucci


In [88]:
df_new['f_name'].sort_values()

3     ABHINAV
1        DIVY
6      Dakuta
4       GOWRI
2      RAGHAV
0    SHASHANK
5        Yama
Name: f_name, dtype: object

In [89]:
df.sort_values(by = 'Country')['Country'].head(50)

8599     Afghanistan
24908    Afghanistan
57044    Afghanistan
41950    Afghanistan
50469    Afghanistan
37601    Afghanistan
28270    Afghanistan
7586     Afghanistan
43157    Afghanistan
61453    Afghanistan
50492    Afghanistan
36354    Afghanistan
43140    Afghanistan
63925    Afghanistan
38261    Afghanistan
35602    Afghanistan
43010    Afghanistan
36887    Afghanistan
43708    Afghanistan
35541    Afghanistan
64306    Afghanistan
43709    Afghanistan
23708    Afghanistan
41506    Afghanistan
63993    Afghanistan
43843    Afghanistan
59329    Afghanistan
895      Afghanistan
50847    Afghanistan
35052    Afghanistan
2918     Afghanistan
53923    Afghanistan
15371    Afghanistan
45879    Afghanistan
44571    Afghanistan
50812    Afghanistan
62520    Afghanistan
35511    Afghanistan
33755    Afghanistan
26477    Afghanistan
37284    Afghanistan
9806     Afghanistan
56146    Afghanistan
29781    Afghanistan
44504    Afghanistan
50641    Afghanistan
30491    Afghanistan
26676    Afgh

In [90]:
## Counting unique values in a series
df['Country'].value_counts().head(50)

United States         12469
India                  8403
United Kingdom         3896
Germany                3890
Canada                 2191
France                 1898
Brazil                 1818
Netherlands            1343
Poland                 1278
Australia              1208
Spain                  1161
Italy                  1117
Russian Federation     1092
Sweden                  886
Pakistan                809
Turkey                  777
Israel                  653
Switzerland             629
Bangladesh              609
Romania                 604
Nigeria                 593
Mexico                  540
Austria                 539
Ukraine                 539
Czech Republic          500
Indonesia               499
South Africa            494
Iran                    491
Belgium                 459
Ireland                 444
Philippines             405
Denmark                 397
Argentina               395
Greece                  390
Portugal                388
Norway              

In [94]:
## giving the 10 largest values inside the series 'COnvertedComp'
print(df['ConvertedComp'].nlargest(10))

df['ConvertedComp'].nsmallest(10)

121     2000000.0
123     2000000.0
191     2000000.0
663     2000000.0
697     2000000.0
722     2000000.0
816     2000000.0
982     2000000.0
1018    2000000.0
1032    2000000.0
Name: ConvertedComp, dtype: float64


122     0.0
236     0.0
882     0.0
1112    0.0
1272    0.0
1785    0.0
2064    0.0
2187    0.0
2239    0.0
2314    0.0
Name: ConvertedComp, dtype: float64

In [96]:
## returns DataFrame with the highest values of 'ConvertedComp' and similarly works with the nsmallest()
df.nlargest(10 , 'ConvertedComp')

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,ConvertedComp,Country,CurrencyDesc,CurrencySymbol,DatabaseDesireNextYear,DatabaseWorkedWith,DevType,EdLevel,Employment,Ethnicity,Gender,JobFactors,JobSat,JobSeek,LanguageDesireNextYear,LanguageWorkedWith,MiscTechDesireNextYear,MiscTechWorkedWith,NEWCollabToolsDesireNextYear,NEWCollabToolsWorkedWith,NEWDevOps,NEWDevOpsImpt,NEWEdImpt,NEWJobHunt,NEWJobHuntResearch,NEWLearn,NEWOffTopic,NEWOnboardGood,NEWOtherComms,NEWOvertime,NEWPurchaseResearch,NEWPurpleLink,NEWSOSites,NEWStuck,OpSys,OrgSize,PlatformDesireNextYear,PlatformWorkedWith,PurchaseWhat,Sexuality,SOAccount,SOComm,SOPartFreq,SOVisitFreq,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
121,123,I am a developer by profession,Yes,26.0,12,Weekly,120000.0,2000000.0,United States,United States dollar,USD,Cassandra;Firebase;MongoDB;PostgreSQL;Redis,Cassandra,"Developer, back-end;Developer, full-stack","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,"Languages, frameworks, and other technologies ...",Neither satisfied nor dissatisfied,"I’m not actively looking, but I am open to new...",C;C++;Go;Kotlin;Rust,Bash/Shell/PowerShell;C++;Go;Java;Python,Flutter;Hadoop;TensorFlow;Unreal Engine,.NET Core,Confluence;Jira;Github;Slack,Confluence;Jira;Github;Gitlab;Slack;Microsoft ...,Yes,Somewhat important,Very important,Trouble with my direct manager;Trouble with le...,"Read company media, such as employee blogs or ...",Once a year,Yes,No,No,Rarely: 1-2 days per year or less,Start a free trial;Ask developers I know/work ...,Indifferent,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Linux-based,"10,000 or more employees",Android;Arduino;AWS;Docker;Google Cloud Platfo...,AWS;Docker;Linux;Raspberry Pi,I have some influence,Straight / Heterosexual,Yes,"Yes, somewhat",Less than once per month or monthly,Daily or almost daily,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",Flask;jQuery;React.js,Spring,Just as welcome now as I felt last year,36.0,8,3
123,125,"I am not primarily a developer, but I write co...",Yes,41.0,30,Monthly,200000.0,2000000.0,United States,United States dollar,USD,PostgreSQL,PostgreSQL,Data scientist or machine learning specialist;...,"Other doctoral degree (Ph.D., Ed.D., etc.)",Employed full-time,White or of European descent,Man,Flex time or a flexible schedule;Family friend...,Very satisfied,I am not interested in new job opportunities,Python,Python;SQL,Keras;Pandas;TensorFlow;Torch/PyTorch,Keras;Pandas;TensorFlow,Jira,Jira,Not sure,Neutral,Critically important,Better work/life balance,Personal network - friends or family;Directly ...,Once every few years,No,Yes,No,Occasionally: 1-2 days per quarter but less th...,,Amused,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Windows,"5,000 to 9,999 employees",Docker;Kubernetes,Docker,I have little or no influence,Straight / Heterosexual,Not sure/can't remember,"No, not really",,Multiple times per day,Easy,Appropriate in length,No,,,,Just as welcome now as I felt last year,40.0,11,11
191,193,I am a developer by profession,Yes,29.0,16,Weekly,120000.0,2000000.0,United States,United States dollar,USD,,,"Developer, mobile","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,White or of European descent,Man,Specific department or team I’d be working on;...,Very satisfied,"I’m not actively looking, but I am open to new...",Objective-C;Swift,Java;JavaScript;Objective-C;Swift,,,"Github;Slack;Trello;Google Suite (Docs, Meet, ...",Github;Gitlab;Slack;Microsoft Teams;Trello;Goo...,No,Neutral,Fairly important,Curious about other opportunities;Better compe...,"Read company media, such as employee blogs or ...",Every few months,No,Onboarding? What onboarding?,No,Never,,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Play games;Visit Stack Overflow;Do other work ...,MacOS,10 to 19 employees,Android;iOS,Android;iOS,I have little or no influence,Straight / Heterosexual,Yes,"No, not really",Less than once per month or monthly,A few times per week,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",,,Just as welcome now as I felt last year,40.0,13,7
663,665,I am a developer by profession,Yes,24.0,13,Weekly,150000.0,2000000.0,United States,United States dollar,USD,,PostgreSQL,"Developer, front-end","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,Black or of African descent;Biracial,Woman,Diversity of the company or organization;Flex ...,Very satisfied,I am not interested in new job opportunities,HTML/CSS;JavaScript;TypeScript,HTML/CSS;JavaScript;Python;Ruby,Node.js,Node.js,"Jira;Github;Slack;Google Suite (Docs, Meet, etc)","Jira;Github;Slack;Google Suite (Docs, Meet, etc)",Yes,Extremely important,Somewhat important,Better compensation;Trouble with leadership at...,"Read company media, such as employee blogs or ...",Once a year,Not sure,Yes,Yes,Occasionally: 1-2 days per quarter but less th...,,Indifferent,Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,MacOS,"1,000 to 4,999 employees",AWS;Google Cloud Platform;Kubernetes;Linux;Mac...,Google Cloud Platform;Heroku;MacOS;WordPress,I have little or no influence,Bisexual;Queer,Yes,"No, not really",I have never participated in Q&A on Stack Over...,A few times per month or weekly,Neither easy nor difficult,Appropriate in length,No,"A humanities discipline (such as literature, h...",React.js,Express;React.js;Ruby on Rails,Just as welcome now as I felt last year,40.0,4,Less than 1 year
697,699,"I am not primarily a developer, but I write co...",Yes,39.0,16,Weekly,52000.0,2000000.0,United States,United States dollar,USD,MariaDB;Microsoft SQL Server;MySQL;PostgreSQL,Microsoft SQL Server;MySQL,System administrator,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Independent contractor, freelancer, or self-em...",White or of European descent,Man,Diversity of the company or organization;Offic...,Slightly satisfied,I am actively looking for a job,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,Bash/Shell/PowerShell;C;C#;C++;HTML/CSS;Java;J...,.NET;.NET Core;Pandas;Puppet;Unreal Engine,,Microsoft Teams;Microsoft Azure;Trello;Google ...,Jira;Microsoft Teams;Microsoft Azure;Trello;Go...,No,Somewhat important,Critically important,Growth or leadership opportunities,"Read company media, such as employee blogs or ...",Once a year,,No,No,Often: 1-2 days per week or more,Start a free trial;Visit developer communities...,"Hello, old friend",I have never visited any of these sites,Call a coworker or friend;Visit Stack Overflow...,Windows,100 to 499 employees,Android;Arduino;AWS;Google Cloud Platform;Kube...,WordPress,,Straight / Heterosexual,,,,,Neither easy nor difficult,Appropriate in length,No,"Information systems, information technology, o...",Angular;ASP.NET;ASP.NET Core;Express;Flask,,,40.0,5,2
722,724,"I am not primarily a developer, but I write co...",Yes,,12,Weekly,103000.0,2000000.0,United States,United States dollar,USD,,MongoDB,"Developer, back-end;Engineer, data;System admi...",Some college/university study without earning ...,Employed full-time,Black or of African descent,Man,Flex time or a flexible schedule;Remote work o...,Very dissatisfied,"I’m not actively looking, but I am open to new...",,JavaScript,,Node.js,,Slack,Not sure,Neutral,Critically important,Growth or leadership opportunities,"Read other media like news articles, founder p...",Once every few years,No,No,No,Often: 1-2 days per week or more,,Indifferent,Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,MacOS,100 to 499 employees,,Docker;Google Cloud Platform,I have little or no influence,Straight / Heterosexual,Yes,"No, not at all",I have never participated in Q&A on Stack Over...,A few times per month or weekly,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",,React.js,Not applicable - I did not use Stack Overflow ...,40.0,3,3
816,818,"I am not primarily a developer, but I write co...",Yes,40.0,15,Weekly,70000.0,2000000.0,United States,United States dollar,USD,PostgreSQL;Redis;SQLite,IBM DB2;MySQL;PostgreSQL;SQLite,Data or business analyst,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,"Languages, frameworks, and other technologies ...",Very dissatisfied,I am actively looking for a job,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...,Bash/Shell/PowerShell;HTML/CSS;Java;Python;SQL...,.NET;Ansible;Chef;Node.js;Pandas;TensorFlow,,Confluence;Jira;Github;Gitlab;Slack;Microsoft ...,Confluence;Github,No,Somewhat important,Fairly important,Having a bad day (or week or month) at work;Be...,"Read company media, such as employee blogs or ...",Once a year,No,No,No,Occasionally: 1-2 days per quarter but less th...,Ask developers I know/work with;Visit develope...,Indifferent,Stack Overflow (public Q&A for anyone who code...,Play games;Call a coworker or friend;Visit Sta...,Linux-based,100 to 499 employees,Android;AWS;Docker;Google Cloud Platform;Linux...,AWS;Heroku;Linux;Raspberry Pi,I have some influence,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per week,Multiple times per day,Easy,Appropriate in length,No,"A social science (such as anthropology, psycho...",Angular;Flask;Spring,,Somewhat less welcome now than last year,40.0,25,2
982,986,I am a developer by profession,Yes,27.0,11,Weekly,81000.0,2000000.0,United States,United States dollar,USD,MongoDB;MySQL;PostgreSQL,MongoDB;MySQL;PostgreSQL,"Database administrator;Designer;Developer, bac...","Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Employed full-time,White or of European descent,Man,"Flex time or a flexible schedule;Languages, fr...",Very satisfied,"I’m not actively looking, but I am open to new...",C++;HTML/CSS;JavaScript;PHP;TypeScript,C;C++;HTML/CSS;Java;JavaScript;Objective-C;PHP...,Node.js,Cordova;Node.js;React Native,Github;Gitlab;Stack Overflow for Teams,Github;Gitlab;Stack Overflow for Teams,No,Somewhat important,Not at all important/not necessary,Better compensation;Trouble with my direct man...,"Read company media, such as employee blogs or ...",Once a year,No,Yes,Yes,Rarely: 1-2 days per year or less,Ask developers I know/work with;Visit develope...,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,Windows,"1,000 to 4,999 employees",Android;Arduino;AWS;Docker;Linux;Raspberry Pi;...,Android;Arduino;AWS;iOS;Linux;MacOS;Raspberry ...,I have some influence,Straight / Heterosexual,Yes,"Yes, definitely",A few times per month or weekly,Multiple times per day,Easy,Too long,No,"Computer science, computer engineering, or sof...",Express;Laravel,Express;jQuery;Laravel,Just as welcome now as I felt last year,40.0,16,8
1018,1022,I am a developer by profession,Yes,34.0,16,Weekly,142000.0,2000000.0,United States,United States dollar,USD,Elasticsearch;MongoDB;PostgreSQL;Redis,MongoDB;PostgreSQL,"Developer, full-stack","Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Employed full-time,White or of European descent,Man,"Languages, frameworks, and other technologies ...",Very satisfied,I am not interested in new job opportunities,HTML/CSS;JavaScript;Scala;SQL;TypeScript,HTML/CSS;Java;JavaScript;Scala;SQL;TypeScript,Apache Spark;Node.js,Node.js,"Github;Trello;Google Suite (Docs, Meet, etc)",Confluence;Jira;Github;Slack;Trello;Google Sui...,Yes,Extremely important,Very important,Having a bad day (or week or month) at work;Be...,Company reviews from third party sites (e.g. G...,Once every few years,Not sure,No,No,Rarely: 1-2 days per year or less,,Annoyed,Stack Overflow (public Q&A for anyone who code...,Call a coworker or friend;Visit Stack Overflow...,MacOS,100 to 499 employees,AWS;Docker;Kubernetes,AWS;Docker;Heroku,I have little or no influence,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per month or weekly,Daily or almost daily,Easy,Too long,No,"Computer science, computer engineering, or sof...",React.js,Angular;Angular.js;React.js;Vue.js,Just as welcome now as I felt last year,40.0,18,13
1032,1036,I am a developer by profession,Yes,26.0,11,Weekly,68500.0,2000000.0,United States,United States dollar,USD,MongoDB;PostgreSQL,MariaDB;Microsoft SQL Server;MongoDB;MySQL;Pos...,"Database administrator;Developer, back-end;Dev...","Associate degree (A.A., A.S., etc.)",Employed full-time,White or of European descent;Indigenous (such ...,Man,Remote work options;Office environment or comp...,Slightly satisfied,I am not interested in new job opportunities,C#;JavaScript;Perl;TypeScript,C;C#;HTML/CSS;Java;JavaScript;Perl;PHP;SQL;Typ...,.NET Core;Node.js;Xamarin,.NET;.NET Core;Node.js;React Native;Xamarin,Microsoft Teams;Microsoft Azure,Jira;Github;Microsoft Teams;Microsoft Azure;Go...,No,Extremely important,Critically important,Better compensation;Wanting to work with new t...,"Read company media, such as employee blogs or ...",Once a year,Not sure,Yes,No,Occasionally: 1-2 days per quarter but less th...,,"Hello, old friend",Stack Overflow (public Q&A for anyone who code...,Visit Stack Overflow;Go for a walk or other ph...,Windows,20 to 99 employees,Linux,Docker;Linux;Raspberry Pi;Windows;WordPress,I have little or no influence,Straight / Heterosexual,Yes,"Yes, somewhat",A few times per month or weekly,A few times per week,Easy,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core;Vue.js,Angular;ASP.NET;ASP.NET Core;Django;Express;Fl...,A lot more welcome now than last year,40.0,5,3
