In [1]:
# https://www.youtube.com/watch?v=DCDe29sIKcE&list=PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS&index=5
import pandas as pd
import os

#data_path, schema_path = 'data/news.csv', 'data/news_schema.csv'
#df, schema_df = pd.read_csv(data_path), pd.read_csv(schema_path)
people = {
    "first": ["Dave", "Jane", "John", "Alfred"],
    "last": ["Zimmer", "Doe", "Doe", "Wayne"],
    "email": ["DaveZimmer@mail.com", "JaneDoe@mail.com", "JohnDoe@mail.com", "Alfie@butlers.com"]}
df = pd.DataFrame(people)

In [2]:
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnDoe@mail.com,John,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [3]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [6]:
df.columns = ['E-mail', 'First_Name', 'Last_Name']

In [26]:
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnDoe@mail.com,John,Doe
3,Alfie@butlers.com,Alfred,Wayne


### Change column names

In [17]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,e-mail,first||name,last||name
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnDoe@mail.com,John,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [24]:
df.columns = df.columns.str.replace("_", "")
df

Unnamed: 0,e-mail,firstname,lastname
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnDoe@mail.com,John,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [25]:
df.rename(columns={'e-mail': 'email', 'firstname': 'first', 'lastname': 'last'}, inplace=True)
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnDoe@mail.com,John,Doe
3,Alfie@butlers.com,Alfred,Wayne


### Change values

In [27]:
df.loc[2]

email    JohnDoe@mail.com
first                John
last                  Doe
Name: 2, dtype: object

In [31]:
df.loc[2] = ['JohnsNewMail@mail.com', 'Johnny', 'Doe']
df.loc[2]

email    JohnsNewMail@mail.com
first                   Johnny
last                       Doe
Name: 2, dtype: object

In [33]:
df.loc[2, ['email', 'first']] = ['JohnsOldEmail@mail.com', 'Johnathan']

In [35]:
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnsOldEmail@mail.com,Johnathan,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [38]:
filt = (df['email'] == 'JohnsOldEmail@mail.com')
df[filt]['last']

2    Doe
Name: last, dtype: object

In [39]:
# THIS DOES NOT WORK:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [40]:
df #See it didn't change the value

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnsOldEmail@mail.com,Johnathan,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [41]:
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnsOldEmail@mail.com,Johnathan,Smith
3,Alfie@butlers.com,Alfred,Wayne


In [42]:
df.loc[2, 'last'] = 'Doe'
df

Unnamed: 0,email,first,last
0,DaveZimmer@mail.com,Dave,Zimmer
1,JaneDoe@mail.com,Jane,Doe
2,JohnsOldEmail@mail.com,Johnathan,Doe
3,Alfie@butlers.com,Alfred,Wayne


In [45]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,email,first,last
0,davezimmer@mail.com,Dave,Zimmer
1,janedoe@mail.com,Jane,Doe
2,johnsoldemail@mail.com,Johnathan,Doe
3,alfie@butlers.com,Alfred,Wayne


### Apply, Map, ApplyMap, Replace

In [51]:
df['email'].apply(len)

0    19
1    16
2    22
3    17
Name: email, dtype: int64

In [55]:
df['email'].apply(len)

0    19
1    16
2    22
3    17
Name: email, dtype: int64

In [57]:
df['email_len'] = df['email'].apply(len)
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Dave,Zimmer,19
1,janedoe@mail.com,Jane,Doe,16
2,johnsoldemail@mail.com,Johnathan,Doe,22
3,alfie@butlers.com,Alfred,Wayne,17


In [64]:
def update_email(email):
    return email.upper()

In [66]:
df['email'].apply(update_email) # This is not an inplace transformation

0       DAVEZIMMER@MAIL.COM
1          JANEDOE@MAIL.COM
2    JOHNSOLDEMAIL@MAIL.COM
3         ALFIE@BUTLERS.COM
Name: email, dtype: object

In [67]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,email,first,last,email_len
0,DAVEZIMMER@MAIL.COM,Dave,Zimmer,19
1,JANEDOE@MAIL.COM,Jane,Doe,16
2,JOHNSOLDEMAIL@MAIL.COM,Johnathan,Doe,22
3,ALFIE@BUTLERS.COM,Alfred,Wayne,17


In [68]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Dave,Zimmer,19
1,janedoe@mail.com,Jane,Doe,16
2,johnsoldemail@mail.com,Johnathan,Doe,22
3,alfie@butlers.com,Alfred,Wayne,17


In [77]:
df['email_len'] = df['email_len'].apply(lambda x: 0)
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Dave,Zimmer,0
1,janedoe@mail.com,Jane,Doe,0
2,johnsoldemail@mail.com,Johnathan,Doe,0
3,alfie@butlers.com,Alfred,Wayne,0


In [78]:
df.apply(len)

email        4
first        4
last         4
email_len    4
dtype: int64

In [79]:
df.apply(len, axis='columns')

0    4
1    4
2    4
3    4
dtype: int64

In [83]:
df.apply(pd.Series.min)

email        alfie@butlers.com
first                   Alfred
last                       Doe
email_len                    0
dtype: object

In [84]:
df.apply(pd.Series.max)

email        johnsoldemail@mail.com
first                     Johnathan
last                         Zimmer
email_len                         0
dtype: object

In [85]:
df.apply(lambda x: x.min())

email        alfie@butlers.com
first                   Alfred
last                       Doe
email_len                    0
dtype: object

In [86]:
df.apply(lambda x: x.max())

email        johnsoldemail@mail.com
first                     Johnathan
last                         Zimmer
email_len                         0
dtype: object

In [88]:
df['email_len'] = df['email_len'].apply(lambda x: "hello and")

In [89]:
df.applymap(len) #Applies the function to every element in the dataframe, like a matrix operation

Unnamed: 0,email,first,last,email_len
0,19,4,6,9
1,16,4,3,9
2,22,9,3,9
3,17,6,5,9


In [90]:
df.applymap(str.lower)

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,dave,zimmer,hello and
1,janedoe@mail.com,jane,doe,hello and
2,johnsoldemail@mail.com,johnathan,doe,hello and
3,alfie@butlers.com,alfred,wayne,hello and


In [91]:
df.applymap(str.upper)

Unnamed: 0,email,first,last,email_len
0,DAVEZIMMER@MAIL.COM,DAVE,ZIMMER,HELLO AND
1,JANEDOE@MAIL.COM,JANE,DOE,HELLO AND
2,JOHNSOLDEMAIL@MAIL.COM,JOHNATHAN,DOE,HELLO AND
3,ALFIE@BUTLERS.COM,ALFRED,WAYNE,HELLO AND


In [92]:
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Dave,Zimmer,hello and
1,janedoe@mail.com,Jane,Doe,hello and
2,johnsoldemail@mail.com,Johnathan,Doe,hello and
3,alfie@butlers.com,Alfred,Wayne,hello and


In [95]:
df['first'].map({'Dave':'Diddler', 'Jane':'Jill'})

0    Diddler
1       Jill
2        NaN
3        NaN
Name: first, dtype: object

In [96]:
df['first'].replace({'Dave':'Diddler', 'Jane':'Jill'})

0      Diddler
1         Jill
2    Johnathan
3       Alfred
Name: first, dtype: object

In [97]:
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Dave,Zimmer,hello and
1,janedoe@mail.com,Jane,Doe,hello and
2,johnsoldemail@mail.com,Johnathan,Doe,hello and
3,alfie@butlers.com,Alfred,Wayne,hello and


In [99]:
df['first'] = df['first'].replace({'Dave':'Diddler', 'Jane':'Jill'})
df

Unnamed: 0,email,first,last,email_len
0,davezimmer@mail.com,Diddler,Zimmer,hello and
1,janedoe@mail.com,Jill,Doe,hello and
2,johnsoldemail@mail.com,Johnathan,Doe,hello and
3,alfie@butlers.com,Alfred,Wayne,hello and
