In [1]:
import pandas as pd

In [2]:
people = {
    'first_name': ['John', 'Jane', 'Michael', 'Emily', 'David', 'Sarah', 'Daniel', 'Olivia', 'Matthew', 'Sophia', 'Christopher', 'Isabella', 'Andrew', 'Emma', 'James'],
    'last_name': ['Smith', 'Johnson', 'Williams', 'Jones', 'Brown', 'Davis', 'Miller', 'Wilson', 'Moore', 'Taylor', 'Anderson', 'Thomas', 'Jackson', 'White', 'Harris'],
    'email': ['john@example.com', 'jane@example.com', 'michael@example.com', 'emily@example.com', 'david@example.com', 'sarah@example.com', 'daniel@example.com', 'olivia@example.com', 'matthew@example.com', 'sophia@example.com', 'christopher@example.com', 'isabella@example.com', 'andrew@example.com', 'emma@example.com', 'james@example.com'],
    'country': ['USA', 'Canada', 'UK', 'Australia', 'Germany', 'France', 'Spain', 'Italy', 'Japan', 'China', 'Brazil', 'Mexico', 'India', 'South Africa', 'Russia'],
    'gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M'],
    'age': [32, 28, 35, 42, 39, 31, 37, 26, 34, 29, 33, 27, 36, 30, 38]
}

In [3]:
df = pd.DataFrame(people)

### Changing columns

In [4]:
df.columns = [column.upper() for column in df.columns]

In [5]:
df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL', 'COUNTRY', 'GENDER', 'AGE'], dtype='object')

In [6]:
df.columns = df.columns.str.replace('_', ' ')

In [8]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL,COUNTRY,GENDER,AGE
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,Michael,Williams,michael@example.com,UK,M,35
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


Let's revert our changes.

In [11]:
df.columns = [column.lower() for column in df.columns.str.replace(' ', '_')]

In [12]:
df.columns

Index(['first_name', 'last_name', 'email', 'country', 'gender', 'age'], dtype='object')

In [13]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,Michael,Williams,michael@example.com,UK,M,35
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


In [14]:
df.rename(columns={'gender': 'sex',})

Unnamed: 0,first_name,last_name,email,country,sex,age
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,Michael,Williams,michael@example.com,UK,M,35
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


### Changing rows

In [15]:
df.loc[2] = ['John', 'Maxima', 'john@maxima.me', 'Turkey', 'M', 53]

In [17]:
df.loc[12, ['email']] = 'aj@micro.io'

In [18]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,John,Maxima,john@maxima.me,Turkey,M,53
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


In [19]:
df.at[2, 'last_name'] = 'Maxima Taximma'

In [20]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,John,Maxima Taximma,john@maxima.me,Turkey,M,53
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


### Note
Prefer `at` over `loc`. It's faster.

### Warning
When filtering dataframe, you cannot use that dataframe to change values, because it's only a copy of dataframe.

In [24]:
filt = (df['email'] == 'aj@micro.io')

In [25]:
df[filt]['email'] = 'andjackson@micro.io'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['email'] = 'andjackson@micro.io'


The right way is:

In [26]:
df.loc[filt, 'email'] = 'andjackson@micro.io'

In [27]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,M,32
1,Jane,Johnson,jane@example.com,Canada,F,28
2,John,Maxima Taximma,john@maxima.me,Turkey,M,53
3,Emily,Jones,emily@example.com,Australia,F,42
4,David,Brown,david@example.com,Germany,M,39
5,Sarah,Davis,sarah@example.com,France,F,31
6,Daniel,Miller,daniel@example.com,Spain,M,37
7,Olivia,Wilson,olivia@example.com,Italy,F,26
8,Matthew,Moore,matthew@example.com,Japan,M,34
9,Sophia,Taylor,sophia@example.com,China,F,29


### Updating all rows at once

In [28]:
df['gender'] = df['gender'].str.lower()

In [29]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,m,32
1,Jane,Johnson,jane@example.com,Canada,f,28
2,John,Maxima Taximma,john@maxima.me,Turkey,m,53
3,Emily,Jones,emily@example.com,Australia,f,42
4,David,Brown,david@example.com,Germany,m,39
5,Sarah,Davis,sarah@example.com,France,f,31
6,Daniel,Miller,daniel@example.com,Spain,m,37
7,Olivia,Wilson,olivia@example.com,Italy,f,26
8,Matthew,Moore,matthew@example.com,Japan,m,34
9,Sophia,Taylor,sophia@example.com,China,f,29


### Upadating rows in an advanced way

These methods are used when updating all rows:

- `apply`: Used on calling functions on our values. Works both with dataframe and series.
- `map`: Only works on series.
- `applymap`: It's deprecated. Use `map`.
- `replace`: It's like `map`, but only works on selected items.

In [30]:
df['email'].apply(len)

0     16
1     16
2     14
3     17
4     17
5     17
6     18
7     18
8     19
9     18
10    23
11    20
12    19
13    16
14    17
Name: email, dtype: int64

In [31]:
def update_gender(gender):
    if gender == 'm':
        return 'Male'
    return 'Female'

In [33]:
df['gender'] = df['gender'].apply(update_gender)

In [34]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,Male,32
1,Jane,Johnson,jane@example.com,Canada,Female,28
2,John,Maxima Taximma,john@maxima.me,Turkey,Male,53
3,Emily,Jones,emily@example.com,Australia,Female,42
4,David,Brown,david@example.com,Germany,Male,39
5,Sarah,Davis,sarah@example.com,France,Female,31
6,Daniel,Miller,daniel@example.com,Spain,Male,37
7,Olivia,Wilson,olivia@example.com,Italy,Female,26
8,Matthew,Moore,matthew@example.com,Japan,Male,34
9,Sophia,Taylor,sophia@example.com,China,Female,29


In [35]:
df['gender'] = df['gender'].apply(lambda gender: gender.lower())

In [36]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,male,32
1,Jane,Johnson,jane@example.com,Canada,female,28
2,John,Maxima Taximma,john@maxima.me,Turkey,male,53
3,Emily,Jones,emily@example.com,Australia,female,42
4,David,Brown,david@example.com,Germany,male,39
5,Sarah,Davis,sarah@example.com,France,female,31
6,Daniel,Miller,daniel@example.com,Spain,male,37
7,Olivia,Wilson,olivia@example.com,Italy,female,26
8,Matthew,Moore,matthew@example.com,Japan,male,34
9,Sophia,Taylor,sophia@example.com,China,female,29


In [37]:
df.apply(len)

first_name    15
last_name     15
email         15
country       15
gender        15
age           15
dtype: int64

In [38]:
df.apply(len, axis='columns')

0     6
1     6
2     6
3     6
4     6
5     6
6     6
7     6
8     6
9     6
10    6
11    6
12    6
13    6
14    6
dtype: int64

In [40]:
df.apply(pd.Series.min)

first_name                 Andrew
last_name                Anderson
email         andjackson@micro.io
country                 Australia
gender                     female
age                            26
dtype: object

In [42]:
df.apply(lambda series_ojb: series_ojb.max())

first_name                Sophia
last_name                 Wilson
email         sophia@example.com
country                      USA
gender                      male
age                           53
dtype: object

In [45]:
df.applymap(type)

  df.applymap(type)


Unnamed: 0,first_name,last_name,email,country,gender,age
0,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
1,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
2,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
3,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
4,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
5,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
6,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
7,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
8,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>
9,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'int'>


In [46]:
df

Unnamed: 0,first_name,last_name,email,country,gender,age
0,John,Smith,john@example.com,USA,male,32
1,Jane,Johnson,jane@example.com,Canada,female,28
2,John,Maxima Taximma,john@maxima.me,Turkey,male,53
3,Emily,Jones,emily@example.com,Australia,female,42
4,David,Brown,david@example.com,Germany,male,39
5,Sarah,Davis,sarah@example.com,France,female,31
6,Daniel,Miller,daniel@example.com,Spain,male,37
7,Olivia,Wilson,olivia@example.com,Italy,female,26
8,Matthew,Moore,matthew@example.com,Japan,male,34
9,Sophia,Taylor,sophia@example.com,China,female,29


In [47]:
df['last_name'].map({'Brown': 'Yellow', 'Taylor': 'Jeferson'})

0          NaN
1          NaN
2          NaN
3          NaN
4       Yellow
5          NaN
6          NaN
7          NaN
8          NaN
9     Jeferson
10         NaN
11         NaN
12         NaN
13         NaN
14         NaN
Name: last_name, dtype: object

In [48]:
df['last_name'].replace({'Brown': 'Yellow', 'Taylor': 'Jeferson'})

0              Smith
1            Johnson
2     Maxima Taximma
3              Jones
4             Yellow
5              Davis
6             Miller
7             Wilson
8              Moore
9           Jeferson
10          Anderson
11            Thomas
12           Jackson
13             White
14            Harris
Name: last_name, dtype: object

In [49]:
df.rename(columns={'gender': 'sex'}, inplace=True)

In [50]:
df

Unnamed: 0,first_name,last_name,email,country,sex,age
0,John,Smith,john@example.com,USA,male,32
1,Jane,Johnson,jane@example.com,Canada,female,28
2,John,Maxima Taximma,john@maxima.me,Turkey,male,53
3,Emily,Jones,emily@example.com,Australia,female,42
4,David,Brown,david@example.com,Germany,male,39
5,Sarah,Davis,sarah@example.com,France,female,31
6,Daniel,Miller,daniel@example.com,Spain,male,37
7,Olivia,Wilson,olivia@example.com,Italy,female,26
8,Matthew,Moore,matthew@example.com,Japan,male,34
9,Sophia,Taylor,sophia@example.com,China,female,29
