In [1]:
people = {
    "first" : ['Shreya', 'Jane', 'John'],
    "last" : ['Walia', 'Doe', 'Doe'],
    "email" : ['sw@email.com', 'janedoe@email.com', 'johndoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

# updating columns

In [6]:
#1. Renaming 
#used when we need to update all columns 
#assigning column name
df.columns = ['first_name', 'last_name', 'email']

In [7]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [8]:
#to change something specific in col names like upper, lower casing or removing space from all col names
df.columns = [x.upper() for x in df.columns]
df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [9]:
df.columns = df.columns.str.replace('_', ' ')
df.columns

Index(['FIRST NAME', 'LAST NAME', 'EMAIL'], dtype='object')

In [10]:
df.columns = df.columns.str.replace(' ', '_')
df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [11]:
df.columns = [x.lower() for x in df.columns]
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [12]:
#to change only some columns 
#use rename and pass the dictionary of the columns to be changed
df.rename(columns = {'first_name':'first', 'last_name':'last'}, inplace = True )

#key is ola value and value is new value so we map first_name to first and next #need inplsce args

In [13]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

# updating data in rows

In [14]:
#updating single value
#grabbing row 2
df.iloc[2]
#can use loc too for being more specific

first                 John
last                   Doe
email    johndoe@email.com
Name: 2, dtype: object

In [15]:
#to update this row we can do multiple things
#1st lets pass all new values for row in form of list to update all values
df.iloc[2] = ['John', 'Smith', 'johnsmil@email.com']
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmil@email.com


In [16]:
#to update specific columns of a row 
df.loc[2, ['last', 'email']] = ['Snow', 'jsnow@email.com']
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Snow,jsnow@email.com


In [17]:
#updating single value
df.loc[2, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,jsnow@email.com


In [18]:
#another indexer that can be used is 'at'
df.at[2, 'last'] = 'Doe'
df
#if the column doesnt exist it creates a new one in case of loc iloc too

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,jsnow@email.com


In [19]:
#common error 
#if we try to change the value without an indexer #example
filt = (df['email'] == 'jsnow@email.com')
df[filt] #will look for value

Unnamed: 0,first,last,email
2,John,Doe,jsnow@email.com


In [20]:
#we can get last column too
df[filt]['last']

2    Doe
Name: last, dtype: object

In [21]:
#lets try to assign new value
df[filt]['last'] = 'Smith'
df
#will give warning as background operations do not let it happen

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,jsnow@email.com


In [22]:
df.loc[2, ['last', 'email']] = ['Smith', 'johnsmith@email.com']
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmith@email.com


In [23]:
#updating multiple rows of data
#changing all emails to upper case
df['email'] = df['email'].str.upper() #is not inplace #use assignment
df

Unnamed: 0,first,last,email
0,Shreya,Walia,SW@EMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNSMITH@EMAIL.COM


In [24]:
#4 methods to do row updation
#1. apply #2.map #3. applymap #4. replace

### apply

In [25]:
#apply is used to call function to all values in series or dataframe objects
df['email'] = df['email'].apply(str.lower)
df

Unnamed: 0,first,last,email
0,Shreya,Walia,sw@email.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmith@email.com


In [26]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [27]:
#we can define functions and use them as well #do not use () as it will perform function there only 
def update_last(last):
    return last.upper()

df['last'].apply(update_last)

0    WALIA
1      DOE
2    SMITH
Name: last, dtype: object

In [28]:
#lambda functions can also be used for simple functions
df['last'].apply(lambda x: x.lower())

0    walia
1      doe
2    smith
Name: last, dtype: object

In [29]:
#we can use lambda for integer values too 
#in above case it is being used for series so it runs on all values of series
#but can be used for dataframe as well in that case it will run or row or column not each value of row and col
#applies to every series in df
#explaination

In [30]:
df.apply(len) #applying length on first, last and email columns and find how many rows each col has

first    3
last     3
email    3
dtype: int64

In [31]:
df.apply(len, axis = 'columns') #to know how many columns each row has

0    3
1    3
2    3
dtype: int64

In [32]:
#to find min value for each column
df.apply(pd.Series.min) #gives min for each column # use axis = 'columns' to get min for each row

first                 Jane
last                   Doe
email    janedoe@email.com
dtype: object

In [33]:
#lambda function can also be used
df.apply(lambda x:x.min()) #x is series in this case so min can be used with it

first                 Jane
last                   Doe
email    janedoe@email.com
dtype: object

### Applymap 

In [34]:
#to apply a func on all elements in dataframes use applymap #used with dataframes only
df.applymap(len)

Unnamed: 0,first,last,email
0,6,5,12
1,4,3,17
2,4,5,19


In [35]:
df.applymap(str.upper)

Unnamed: 0,first,last,email
0,SHREYA,WALIA,SW@EMAIL.COM
1,JANE,DOE,JANEDOE@EMAIL.COM
2,JOHN,SMITH,JOHNSMITH@EMAIL.COM


### map

In [36]:
#used with series only 
#for substituting each value in series with another value
df['first'].map({'shreya' : 'chris', 'jane': 'mary'})

0    NaN
1    NaN
2    NaN
Name: first, dtype: object

In [37]:
#dictionary is passed showing values to be sbstituted
#value not substituted gives NaN , to replace specific values successfully use replace

### replace

In [38]:
df['first'].replace({'shreya' : 'chris', 'jane': 'mary'})

0    Shreya
1      Jane
2      John
Name: first, dtype: object

In [39]:
df = df.applymap(str.capitalize)
df

Unnamed: 0,first,last,email
0,Shreya,Walia,Sw@email.com
1,Jane,Doe,Janedoe@email.com
2,John,Smith,Johnsmith@email.com


## Adding and Removing data from df

In [40]:
#adding columns first and last

df['first'] + ' ' + df['last']

0    Shreya Walia
1        Jane Doe
2      John Smith
dtype: object

In [41]:
#assigning it to new col
df['Fullname'] = df['first'] + ' ' + df['last']

In [42]:
df

Unnamed: 0,first,last,email,Fullname
0,Shreya,Walia,Sw@email.com,Shreya Walia
1,Jane,Doe,Janedoe@email.com,Jane Doe
2,John,Smith,Johnsmith@email.com,John Smith


In [43]:
#can also use apply func to create new col in case working with mathematical data
#df['newcol'] = df['num'].apply(sqrt)

In [44]:
#in this case dot(.) cant be used to access and add as python will take dot to be attribute not col

In [45]:
#removing columns

In [46]:
#using drop
df.drop(columns = ['first', 'last'], inplace = True)
df

Unnamed: 0,email,Fullname
0,Sw@email.com,Shreya Walia
1,Janedoe@email.com,Jane Doe
2,Johnsmith@email.com,John Smith


In [47]:
#to reverse process and split fullname
df['Fullname'].str.split(' ')

0    [Shreya, Walia]
1        [Jane, Doe]
2      [John, Smith]
Name: Fullname, dtype: object

In [48]:
#we get a list containg first and last names 
#to get in two lists use expand
df['Fullname'].str.split(' ', expand = True)

Unnamed: 0,0,1
0,Shreya,Walia
1,Jane,Doe
2,John,Smith


In [49]:
#assign col names
df[['first', 'last']] = df['Fullname'].str.split(' ', expand = True)
df

Unnamed: 0,email,Fullname,first,last
0,Sw@email.com,Shreya Walia,Shreya,Walia
1,Janedoe@email.com,Jane Doe,Jane,Doe
2,Johnsmith@email.com,John Smith,John,Smith


In [50]:
df.drop(columns = 'Fullname',inplace = True)
df

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,Johnsmith@email.com,John,Smith


### adding and removing rows

In [51]:
#adding single row of data
#use append
#df.append({'fisrt' : 'Tony'}) #gives error Can only append a Series if ignore_index=True or if the Series has a name
df.append({'fisrt' : 'Tony'}, ignore_index = True)

Unnamed: 0,email,first,last,fisrt
0,Sw@email.com,Shreya,Walia,
1,Janedoe@email.com,Jane,Doe,
2,Johnsmith@email.com,John,Smith,
3,,,,Tony


In [52]:
#assign value as no inplace args
#append two df
df2 = pd.DataFrame({
    "first" : ['Sh', 'Ja'],
    "last" : ['Wal', 'Do'],
    "email" : ['s@email.com', 'jd@email.com']
})

In [53]:
#use sort = False if warning
df.append(df2, ignore_index = True)

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,Johnsmith@email.com,John,Smith
3,s@email.com,Sh,Wal
4,jd@email.com,Ja,Do


In [54]:
#use assignment to make changes permanent
df = df.append(df2, ignore_index = True)

In [55]:
#removing rows
df.drop(index = 4, inplace = True)
df

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,Johnsmith@email.com,John,Smith
3,s@email.com,Sh,Wal


In [68]:
#can drop using conditionals
#can use loc or filters
df.drop(index = df[df['last'] == 'Wal'].index, inplace = True)
df.loc[2, ['last', 'email']] = ['Doe', 'johndoe@email.com']

In [69]:
df

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


## Sorting data

In [70]:
#sorting by last name
df.sort_values(by = 'last')

Unnamed: 0,email,first,last
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe
0,Sw@email.com,Shreya,Walia


In [71]:
df.sort_values(by = 'last', ascending = False)

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [72]:
#sorting on multiple columns #done when there is duplicate value in first col
df.sort_values(by = ['last', 'first']) #use list

Unnamed: 0,email,first,last
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe
0,Sw@email.com,Shreya,Walia


In [None]:
#if we want to sort one col in ascending order and other in descending order
#pass list of boolean values to ascending arg #set inplace args 

In [74]:
df.sort_values(by = ['last', 'first'], ascending = [False, True], inplace = True)

In [75]:
df

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [76]:
#sorting back to before
df.sort_index()

Unnamed: 0,email,first,last
0,Sw@email.com,Shreya,Walia
1,Janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Doe


In [77]:
#for sorting single column like one series column #series obj too have sort_value func
df['last'].sort_values()

1      Doe
2      Doe
0    Walia
Name: last, dtype: object