# MODIFYING DATA IN A DF, MODIFYING ROWS AND COLUMNS

In [1]:
import pandas as pd
import numpy as np


In [2]:
people = {
    "first": ["Corey", 'Jane', 'John'],
    "last": ["Schafer", 'Doe', 'Doe'],
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [5]:
df= pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [6]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [7]:
df.columns = ['first_name','last_name','email_id']

df

Unnamed: 0,first_name,last_name,email_id
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
df.columns =[x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [9]:
df.columns =df.columns.str.replace('_','_')
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL_ID
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
df.columns =[x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email_id
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [13]:
df.rename(columns={'first_name':'first','last_name':'last','email_id':'email'}, inplace =True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [14]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [15]:
df.loc[2] = ['John','Smith','Johnsmith@mail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,Johnsmith@mail.com


In [16]:
df.loc[2,['last','email']] = ['Doe','Johndoe@mail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,Johndoe@mail.com


In [17]:
df.loc[2,'last']='Smith'

In [18]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,Johndoe@mail.com


In [19]:
df.at[2,'last']='Doe'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,Johndoe@mail.com


In [23]:
filt = (df['email']== 'JohnDoe@mail.com')
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,Johndoe@mail.com


In [24]:
df[filt]

Unnamed: 0,first,last,email


In [25]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,Johndoe@mail.com


In [26]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@mail.com


# APPLY: CAN WORK ON DF OR SERIES,APPLIES A FUNCTION TO EVERY VALUE ON THE DF OR SERIES

In [27]:
df['email'].apply(len)

0    23
1    17
2    16
Name: email, dtype: int64

In [28]:
def update_email(email):
    return email.upper()

In [29]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2           JOHNDOE@MAIL.COM
Name: email, dtype: object

In [30]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Doe,JOHNDOE@MAIL.COM


In [31]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@mail.com


In [32]:
df['email'].apply(len)

0    23
1    17
2    16
Name: email, dtype: int64

In [33]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [34]:
df.apply(len,axis = 'columns')

0    3
1    3
2    3
dtype: int64

In [35]:
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

# APPLYMAP ONLY WORKS ON DATA FRAME TO APPLY THE FUNTION TO ALL THE ELEMENTS IN A DATA FRAME
    
    

In [36]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,3,16


In [37]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@mail.com


# MAP = SUBSTITUTING EACH VALUE FOR ANOTHER VALUE ONLY IN SERIES

In [38]:
df['first'].map({'corey':'Chris','Jane':'Mary'})

0     NaN
1    Mary
2     NaN
Name: first, dtype: object

In [39]:
df['first'] = df['first'].replace({'corey':'Chris','Jane':'Mary'})
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Doe,johndoe@mail.com
