In [1]:
people = {
    "first" : ["Corey", "Jane", "John"],
    "last" : ["Schafer", "Doe", "Doe"],
    "email" : ["CoreyMSchafer@gmail.com", "JaneDoe@email.com", "JohnDoe@email.com"]
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [9]:
# renaming columns
df.columns = ['first_name', 'last_name', 'email']

In [8]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
# lowercasing/upercasing
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [11]:
# replacing spaces
df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
# going back
df.columns = df.columns.str.replace(' ', '_')
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [17]:
# If I only want to change some columns
df.rename(columns = {'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [18]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [20]:
# Updating Data in Our Rows

# Updating single value

df.loc[2] = ['John', 'Smith', "JohnSmith@email.com"]

In [23]:
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']

In [24]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [30]:
df.loc[2, 'last'] = 'Smith'

In [31]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [32]:
#  also can use .at for single values (.loc is more general)

df.at[2, 'last'] = 'Doe'

In [33]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [36]:
# example of MISTAKE

filt = (df['email'] == 'JohnDoe@email.com')
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [37]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [39]:
# The way it does work is:

filt = (df['email'] == 'JohnDoe@email.com')
df.loc[filt, 'last'] = 'Smith'

In [40]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [43]:
# Lowercasing email, changing multiple rows at once

df['email'] = df['email'].str.lower()

In [44]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [45]:
# 4 METHODS: apply, map, applymap, replace

# apply (calling function on our values)
# firts on a series:

df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [46]:
def update_email(email):
    return email.upper()

In [48]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [49]:
df['email'] = df['email'].apply(update_email)

In [50]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [51]:
# Lambda function:
df['email'] = df['email'].apply(lambda x: x.lower())

In [52]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [53]:
# apply with DataFrames

df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [54]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [55]:
len(df['email'])

3

In [56]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [57]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [58]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [59]:
# applymap (apply a function to every specific element in the dataframe)
# ONLY WORKS ON DATAFRAMES

df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [60]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [62]:
# map (substituting each vale in a series for another value)
# Only works on a series

df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

# Notice it gives all not specified values a NaN value

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [64]:
# To avoid that, we use replace

df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [65]:
# None of this actually makes changes, we need to:
df['first'] = df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

In [66]:
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
