In [1]:
import pandas as pd

In [2]:
people = {
    'first': ['Jane','John','Joe'],
    'last': ['Doe','Smith','Hoe'],
    'email': ['JaneDoe@emailcom', 'JohnSmith@email.com','JoeHoe@email.com']
}

In [3]:
people = pd.DataFrame(people)

In [4]:
people.columns = ['first_name','last_name','email']

In [5]:
people.columns = [x.upper() for x in people.columns]
people

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Jane,Doe,JaneDoe@emailcom
1,John,Smith,JohnSmith@email.com
2,Joe,Hoe,JoeHoe@email.com


In [6]:
people.columns = people.columns.str.replace('_', ' ')
people

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Jane,Doe,JaneDoe@emailcom
1,John,Smith,JohnSmith@email.com
2,Joe,Hoe,JoeHoe@email.com


In [7]:
people.columns = people.columns.str.replace(' ','_')
people.columns = [x.lower() for x in people.columns]
people

Unnamed: 0,first_name,last_name,email
0,Jane,Doe,JaneDoe@emailcom
1,John,Smith,JohnSmith@email.com
2,Joe,Hoe,JoeHoe@email.com


In [8]:
people.rename(columns = 
              {'first_name':'first',
              'last_name':'last'},
              inplace=True
             )

In [9]:
people.columns

Index(['first', 'last', 'email'], dtype='object')

In [10]:
people.columns = ['first_name','last_name','email']

In [11]:
people.loc[2]=['Joe','Moe','JoeMoe@email.com']

In [12]:
people.loc[2, ['last_name','email']] = ['Hoe','JoeHoe@email.com']

In [13]:
filt_change_last_name = (people['email'] == 'JoeHoe@email.com')
people.loc[filt_change_last_name, 'last_name'] = 'Joe'

In [14]:
people['email']= people['email'].str.lower()

apply, map, applymap, replace

In [15]:
people['email'].apply(len)

0    16
1    19
2    16
Name: email, dtype: int64

In [16]:
def update_email(email):
    return email.upper()

In [17]:
people['email'].apply(update_email)

0       JANEDOE@EMAILCOM
1    JOHNSMITH@EMAIL.COM
2       JOEHOE@EMAIL.COM
Name: email, dtype: object

In [18]:
people = people['email'].apply(update_email)

In [19]:
people

0       JANEDOE@EMAILCOM
1    JOHNSMITH@EMAIL.COM
2       JOEHOE@EMAIL.COM
Name: email, dtype: object

In [20]:
people = pd.DataFrame(people)

In [21]:
people

Unnamed: 0,email
0,JANEDOE@EMAILCOM
1,JOHNSMITH@EMAIL.COM
2,JOEHOE@EMAIL.COM


In [22]:
people['email'] = people['email'].apply(lambda x: x.lower())

In [23]:
people

Unnamed: 0,email
0,janedoe@emailcom
1,johnsmith@email.com
2,joehoe@email.com


apply on DataFrame

In [24]:
people = {
    'first': ['Jane','John','Joe'],
    'last': ['Doe','Smith','Hoe'],
    'email': ['JaneDoe@emailcom', 'JohnSmith@email.com','JoeHoe@email.com']
}

people = pd.DataFrame(people)

In [25]:
people['email'].apply(len)

0    16
1    19
2    16
Name: email, dtype: int64

In [26]:
people.apply(len, axis = 'columns')
#length of the columns if axis = rows  
#length of the rows if axis = columns

0    3
1    3
2    3
dtype: int64

In [27]:
len(people['email'])

3

In [28]:
people.apply(pd.Series.min)
#more useful for numerical data

first                Jane
last                  Doe
email    JaneDoe@emailcom
dtype: object

In [29]:
people.apply(lambda x: x.min())

first                Jane
last                  Doe
email    JaneDoe@emailcom
dtype: object

applymap(only for DataFrame, not Series

In [30]:
people.applymap(len)

Unnamed: 0,first,last,email
0,4,3,16
1,4,5,19
2,3,3,16


In [31]:
people.applymap(str.lower)
#would have error if there were numerical data

Unnamed: 0,first,last,email
0,jane,doe,janedoe@emailcom
1,john,smith,johnsmith@email.com
2,joe,hoe,joehoe@email.com


map(only for Series

In [32]:
people['first'].map({'Jane':'Chris', 'John':'Mary'})
people

Unnamed: 0,first,last,email
0,Jane,Doe,JaneDoe@emailcom
1,John,Smith,JohnSmith@email.com
2,Joe,Hoe,JoeHoe@email.com


In [33]:
people

Unnamed: 0,first,last,email
0,Jane,Doe,JaneDoe@emailcom
1,John,Smith,JohnSmith@email.com
2,Joe,Hoe,JoeHoe@email.com


In [34]:
people['first'].replace({'Jane':'Chris', 'John':'Mary'})
#use replace method to change some data, not all

0    Chris
1     Mary
2      Joe
Name: first, dtype: object

adding a column

In [38]:
df = pd.DataFrame(people)

In [40]:
df['first']+ ' ' + df['last'] 

0      Jane Doe
1    John Smith
2       Joe Hoe
dtype: object

In [41]:
df['full_name'] = df['first']+ ' ' + df['last']

In [42]:
df

Unnamed: 0,first,last,email,full_name
0,Jane,Doe,JaneDoe@emailcom,Jane Doe
1,John,Smith,JohnSmith@email.com,John Smith
2,Joe,Hoe,JoeHoe@email.com,Joe Hoe


removing a column

In [45]:
df = df.drop(columns = ['first', 'last'])

In [48]:
df['full_name'].str.split(' ', expand = True)

Unnamed: 0,0,1
0,Jane,Doe
1,John,Smith
2,Joe,Hoe


In [49]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand = True)

In [50]:
df

Unnamed: 0,email,full_name,first,last
0,JaneDoe@emailcom,Jane Doe,Jane,Doe
1,JohnSmith@email.com,John Smith,John,Smith
2,JoeHoe@email.com,Joe Hoe,Joe,Hoe


In [58]:
df.append({'first':'Tony'}, ignore_index = True)

Unnamed: 0,email,full_name,first,last
0,JaneDoe@emailcom,Jane Doe,Jane,Doe
1,JohnSmith@email.com,John Smith,John,Smith
2,JoeHoe@email.com,Joe Hoe,Joe,Hoe
3,,,Tony,


In [60]:
people = {
    'first': ['Tony','Steve'],
    'last': ['Stark','Rogers'],
    'email': ['TonyStark@ironman.com', 'SteveRogers@captain.com']
}
df2 = pd.DataFrame(people)

In [61]:
df2

Unnamed: 0,first,last,email
0,Tony,Stark,TonyStark@ironman.com
1,Steve,Rogers,SteveRogers@captain.com


In [62]:
df.append(df2, ignore_index = True)

Unnamed: 0,email,full_name,first,last
0,JaneDoe@emailcom,Jane Doe,Jane,Doe
1,JohnSmith@email.com,John Smith,John,Smith
2,JoeHoe@email.com,Joe Hoe,Joe,Hoe
3,TonyStark@ironman.com,,Tony,Stark
4,SteveRogers@captain.com,,Steve,Rogers


In [63]:
df = df.append(df2, ignore_index = True)

In [64]:
df

Unnamed: 0,email,full_name,first,last
0,JaneDoe@emailcom,Jane Doe,Jane,Doe
1,JohnSmith@email.com,John Smith,John,Smith
2,JoeHoe@email.com,Joe Hoe,Joe,Hoe
3,TonyStark@ironman.com,,Tony,Stark
4,SteveRogers@captain.com,,Steve,Rogers


In [66]:
df.drop(index=4, inplace=True)

In [None]:
filt = df['last']== 'Doe'
df.drop(index=df[filt].index)