In [1]:
people = {
    "FIRST": ["Corey", 'Jane', 'John'], 
    "LAST": ["Schafer", 'Doe', 'Doe'], 
    "EMAIL": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,FIRST,LAST,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


##### Updating Columns

In [None]:
df.columns

In [5]:
# This code is used to rename the columns of the DataFrame df to the specified names.

df.columns = ['first_name', 'last_name', 'email']

In [6]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [7]:
# Using list comprehension we can also change the column names (labels) of a DataFrame df to lowercase.

df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
# If your col names have a space. You can replace the space with the underscored like this:
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
# This code is used to rename specific columns in a DataFrame df while applying the renaming operation in place. 

df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


##### Updating Rows

In [11]:
# This code is used to replace the data in the row with index 2 in the DataFrame df with the values provided in the list 

df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [13]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [14]:
# This code is used to update specific values in the DataFrame df for the row with index 2, specifically in the 
# 'last' and 'email' columns.

df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']

In [15]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
# This code updates the value in the 'last' column for the row with index 2 to 'Smith'. 

df.loc[2, 'last'] = 'Smith'

In [17]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [18]:
# This code updates the value in the 'last' column for the row with index 2 to 'Doe'. 

df.at[2, 'last'] = 'Doe'

In [19]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [20]:
# The code is used to create a filter filt for a DataFrame df based on a logical condition. 
# Specifically, it filters rows where the 'email' column has the value 'JohnDoe@email.com'.

filt = (df['email'] == 'JohnDoe@email.com')
filt

0    False
1    False
2     True
Name: email, dtype: bool

In [21]:
# This code will update the 'last' column in the original DataFrame df for the rows where the 
# condition in filt is True.

df.loc[filt, 'last'] = 'Smith'

In [22]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [23]:
# The code is used to convert all the text in the 'email' column of the DataFrame df to lowercase.

df['email'] = df['email'].str.lower()

In [24]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


##### Four commonly used mehtods: apply, map, applymap, replace

##### apply

In [25]:
# How apply works on a series/cols
# This code calculates the length (number of characters) of each email address in the column.

df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [26]:
# You can use this update_email function to transform email addresses to uppercase. 

def update_email(email):
    return email.upper()

In [27]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [28]:
# Here although it is shown above but not updated.
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [29]:
# This is how to update the column.

df['email'] = df['email'].apply(update_email)

In [30]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [31]:
# The code is used to convert all the text in the 'email' column of the DataFrame df to lowercase. 

# .apply(lambda x: x.lower()): The .apply() method is used to apply a function to each element in the selected column. 
# In this case, a lambda function is defined that takes each email address x and converts it to lowercase using x.lower().

df['email'] = df['email'].apply(lambda x: x.lower())

In [32]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [33]:
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

##### How apply runs on a dataframe

In [35]:
# The code is used to apply the len() function to each row in the DataFrame df, calculating the 
# length (number of elements) of each row. The axis='columns' parameter specifies that the function 
# should be applied across the columns of each row.

df.apply(len, axis='columns')

# The result of this code is a pandas Series where each element represents the length 
# (number of elements or columns) of each row in the DataFrame df. It effectively counts 
# the number of columns in each row.

0    3
1    3
2    3
dtype: int64

In [36]:
# This will return the number of rows or elements in the 'email' column of the DataFrame df. 
# This is effectively counting the number of email addresses in that column.

len(df['email'])

3

In [37]:
# The result is a pandas Series where each element represents the minimum value in the corresponding 
# column of the DataFrame df. 

# This can be useful when you want to find the minimum value for each attribute or column in your dataset.

df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [38]:
# The code df.apply(lambda x: x.min()) is used to apply a custom lambda function to 
# each column of the DataFrame df to calculate the minimum value in each column. 

df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

##### applymap 

In [39]:
# It calculates the length (number of characters) of each individual string within the DataFrame.

df.applymap(len)

  df.applymap(len)


Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [40]:
df.map(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [41]:
# The code is used to convert all the text elements (cells) in the entire DataFrame df to lowercase. 

df.applymap(str.lower)

  df.applymap(str.lower)


Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [42]:
df.map(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [43]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


##### map method

In [44]:
# The code is used to create a new Series by mapping values in the 'first' column of the DataFrame df 
# to new values based on the provided dictionary.

df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [45]:
# The data frame is not updated 
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


##### replace method

In [48]:
# The code is used to replace specific values in the 'first' column of the DataFrame df with new values 
# based on the provided dictionary.

df['first'] = df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

# When you execute this code, it will replace all occurrences of 'Corey' with 'Chris' and all occurrences 
# of 'Jane' with 'Mary' in the 'first' column of the DataFrame df. Other values in the 'first' column that 
# are not in the dictionary will remain unchanged.

In [49]:
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
