# Modifying data Within Dataframes

In [8]:
import pandas as pd

people= {
    "first" : ['Corey' , 'John' ,'Lokesh'],
    'last' : ['Schafer','Doe' , 'Doe'],
    'email' : ['something@gmail.com' , 'anything@gmail.com','Hello@gmail.com']
}
people2= {
    "first" : ['Gaurav' , 'Amit' ,'Lokesh'],
    'last' : ['Schafer','Doe' , 'Doe'],
    'email' : ['something@gmail.com' , 'anything@gmail.com','Hello@gmail.com']
}


In [9]:
df = pd.DataFrame(people)
df2 = pd.DataFrame(people2)

In [10]:
df
df2

Unnamed: 0,first,last,email
0,Gaurav,Schafer,something@gmail.com
1,Amit,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


# Updating Column & Rows

In [11]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [12]:
df.columns = ['First_Name' , 'Last_Name' , 'email']

In [13]:
df


Unnamed: 0,First_Name,Last_Name,email
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [14]:
df.columns = [x.upper() for x in df.columns]

In [15]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [16]:
df.columns = df.columns.str.replace('_',' ') # Making changes in Columns

In [17]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [18]:
df.columns = df.columns.str.replace(' ','_') # Making changes in Columns\

In [19]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [20]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [21]:
df.rename(columns = {'first_name' : 'first',
                        'last_name' : 'last'} , inplace = True)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [22]:
df.loc[2 ,['last','email']] = ['Joshi','Same@gmail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Joshi,Same@gmail.com


In [23]:
df['email']=df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,something@gmail.com
1,John,Doe,anything@gmail.com
2,Lokesh,Joshi,same@gmail.com


# 4 Methods
### 1. apply
### 2. map
### 3. applymap
### 4. replace

# 1. apply

In [24]:
# Can work on dataframe or series object
## Work in series

df['Len_of_emails'] = df['email'].apply(len) #Counting the length of email

In [25]:
df

Unnamed: 0,first,last,email,Len_of_emails
0,Corey,Schafer,something@gmail.com,19
1,John,Doe,anything@gmail.com,18
2,Lokesh,Joshi,same@gmail.com,14


In [26]:
def update_email(email):
    return email.upper()

In [27]:
df['email'] =df['email'].apply(update_email)
df

Unnamed: 0,first,last,email,Len_of_emails
0,Corey,Schafer,SOMETHING@GMAIL.COM,19
1,John,Doe,ANYTHING@GMAIL.COM,18
2,Lokesh,Joshi,SAME@GMAIL.COM,14


In [28]:
df['email'] =df['email'].apply(lambda x:x.lower())
df

Unnamed: 0,first,last,email,Len_of_emails
0,Corey,Schafer,something@gmail.com,19
1,John,Doe,anything@gmail.com,18
2,Lokesh,Joshi,same@gmail.com,14


In [29]:
## Working on Data Frames
df['email'].apply(len)

0    19
1    18
2    14
Name: email, dtype: int64

In [30]:
df.apply(len , axis ='columns') ## Changing the axis to columns

0    4
1    4
2    4
dtype: int64

### 2.apply map

In [31]:
df[['first','last','email']].applymap(lambda x: x.lower())

  df[['first','last','email']].applymap(lambda x: x.lower())


Unnamed: 0,first,last,email
0,corey,schafer,something@gmail.com
1,john,doe,anything@gmail.com
2,lokesh,joshi,same@gmail.com


In [32]:
df['first'].map(len)
df

Unnamed: 0,first,last,email,Len_of_emails
0,Corey,Schafer,something@gmail.com,19
1,John,Doe,anything@gmail.com,18
2,Lokesh,Joshi,same@gmail.com,14


In [33]:
df.columns

Index(['first', 'last', 'email', 'Len_of_emails'], dtype='object')

In [34]:
df['first']

0     Corey
1      John
2    Lokesh
Name: first, dtype: object

# Removing/Adding Columns And Rows

In [35]:
df


Unnamed: 0,first,last,email,Len_of_emails
0,Corey,Schafer,something@gmail.com,19
1,John,Doe,anything@gmail.com,18
2,Lokesh,Joshi,same@gmail.com,14


In [36]:
 df['Name'] = df['first'] + " " + df['last']

In [37]:
df


Unnamed: 0,first,last,email,Len_of_emails,Name
0,Corey,Schafer,something@gmail.com,19,Corey Schafer
1,John,Doe,anything@gmail.com,18,John Doe
2,Lokesh,Joshi,same@gmail.com,14,Lokesh Joshi


In [38]:
list1 = ['Name','first','last','email','Len_of_emails']  
# For rearranging the small data set Columns

In [39]:
df = df[list1]
df

Unnamed: 0,Name,first,last,email,Len_of_emails
0,Corey Schafer,Corey,Schafer,something@gmail.com,19
1,John Doe,John,Doe,anything@gmail.com,18
2,Lokesh Joshi,Lokesh,Joshi,same@gmail.com,14


In [40]:
# for removing columns
df.drop(columns = ['first' , 'last'] , inplace = True) # do require inplace for make changes in
#data frame

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns = ['first' , 'last'] , inplace = True) # do require inplace for make changes in


In [41]:
df['Name'].str.split(" " ,expand = True)

Unnamed: 0,0,1
0,Corey,Schafer
1,John,Doe
2,Lokesh,Joshi


In [42]:
df[['first','last']] = df['Name'].str.split(" " ,expand = True)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['first','last']] = df['Name'].str.split(" " ,expand = True)


Unnamed: 0,Name,email,Len_of_emails,first,last
0,Corey Schafer,something@gmail.com,19,Corey,Schafer
1,John Doe,anything@gmail.com,18,John,Doe
2,Lokesh Joshi,same@gmail.com,14,Lokesh,Joshi


In [43]:
df


Unnamed: 0,Name,email,Len_of_emails,first,last
0,Corey Schafer,something@gmail.com,19,Corey,Schafer
1,John Doe,anything@gmail.com,18,John,Doe
2,Lokesh Joshi,same@gmail.com,14,Lokesh,Joshi


In [44]:
df2

Unnamed: 0,first,last,email
0,Gaurav,Schafer,something@gmail.com
1,Amit,Doe,anything@gmail.com
2,Lokesh,Doe,Hello@gmail.com


In [45]:
combined_df = pd.concat([df, df2], ignore_index=True)
df

Unnamed: 0,Name,email,Len_of_emails,first,last
0,Corey Schafer,something@gmail.com,19,Corey,Schafer
1,John Doe,anything@gmail.com,18,John,Doe
2,Lokesh Joshi,same@gmail.com,14,Lokesh,Joshi


In [46]:
combined_df

Unnamed: 0,Name,email,Len_of_emails,first,last
0,Corey Schafer,something@gmail.com,19.0,Corey,Schafer
1,John Doe,anything@gmail.com,18.0,John,Doe
2,Lokesh Joshi,same@gmail.com,14.0,Lokesh,Joshi
3,,something@gmail.com,,Gaurav,Schafer
4,,anything@gmail.com,,Amit,Doe
5,,Hello@gmail.com,,Lokesh,Doe


In [54]:
filters = combined_df['last'] == 'Doe' 

In [55]:
combined_df.drop(index =combined_df[filters].index)

Unnamed: 0,Name,email,Len_of_emails,first,last
0,Corey Schafer,something@gmail.com,19.0,Corey,Schafer
2,Lokesh Joshi,same@gmail.com,14.0,Lokesh,Joshi
3,,something@gmail.com,,Gaurav,Schafer
