# Data Modification in DataFrame
Data in pandas DataFrame can be modified in various ways. We can change the value with assignment operator.
Also pandas provide below useful Functions to perform some advance data manipulation and modification operations.
1. apply() - This is a higher order Function, used to apply a function in the whole dataframe or in a Series Object.
The behaviour of the outcome of the applied function varies whether it is applied on DataFrame or Series object.
We can retrieve certain information of Series object or from whole DataFrame by passing the relevant function to apply().
2. applymap() - Use this higher order Function when you want to apply a function to every element of a dataframe.Series object does not have applymap().
3. map() - Applicable only to Series object, does replacing the value for given one. Adds NaN for non given value. Accepts dict, Series and also Function as argument. 
4. replace() - Applicable only to Series object, does replacing the value for given one. Keeps other as it is.

In [1]:
import pandas as pd

footballer_bkp = {"first_name":["Leonardo", "Bernardo", " Antoine","Loris","Alexis"],
          "last_name":["Bonucci ", " Silva", "Griezmann","Karius","Saelemaekers"],
          "age": ["34", "26", "30", "28", "22"],
          "position":["Centre-Back", "Midfielder", "Forward", "Golkeeper", "Right Winger"],
          "club":["Juventus", "Manchester City", "Barcelona", "Liverpool","A.C. Milan"],
          "nationality": ["Italy", "Portugal", "France", "Germany", "Belgium"]
        }

footballer = {"first_name":["leonardo", "bernardo", "antoine","loris","alexis"],
          "last_name":["Bonucci ", " Silva", "Griezmann","Karius","Saelemaekers"],
          "age": ["32", "26", "30", "28", "22"],
          "position":["Defender", "Midfielder", "Forward", "Golkeeper", "Centre-Forward"],
          "club":["Juventus", "Manchester United", "Barcelona", "Liverpool","A.C. Milan"],
          "nationality": ["Spain", "Portugal", "France", "Germany", "Belgium"]
        }

In [2]:
footballer_df = pd.DataFrame(footballer)
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,leonardo,Bonucci,32,Defender,Juventus,Spain
1,bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,antoine,Griezmann,30,Forward,Barcelona,France
3,loris,Karius,28,Golkeeper,Liverpool,Germany
4,alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


## Updating with assignment operator
Multiple values or selected value can be updated in this approach. Will use loc() and at() method here.

In [3]:
# Pass list of columns as second argument to loc() function with their corresponding values to be updated
footballer_df.loc[0, ['age', 'position', 'nationality']] = ['34', 'Centre-Back', 'Italy']
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,leonardo,Bonucci,34,Centre-Back,Juventus,Italy
1,bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,antoine,Griezmann,30,Forward,Barcelona,France
3,loris,Karius,28,Golkeeper,Liverpool,Germany
4,alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


In [4]:
# Using at() function also same update can be performed as loc()
footballer_df.at[0, ['age', 'position', 'nationality']] = ['32', 'Defender', 'Italy']
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,leonardo,Bonucci,32,Defender,Juventus,Italy
1,bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,antoine,Griezmann,30,Forward,Barcelona,France
3,loris,Karius,28,Golkeeper,Liverpool,Germany
4,alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


In [5]:
# Pass a single column name in case to update that single column. loc() function also can do the same
footballer_df.at[3, 'position'] = 'Goalkeeper'
footballer_df.at[3, 'position']

'Goalkeeper'

In [6]:
# Removed the trailing space from last_name field using Series Object's str class's strip() function 
# and assign it back
footballer_df['last_name'] = footballer_df['last_name'].str.strip()
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,leonardo,Bonucci,32,Defender,Juventus,Italy
1,bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,antoine,Griezmann,30,Forward,Barcelona,France
3,loris,Karius,28,Goalkeeper,Liverpool,Germany
4,alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


In [7]:
# Updating with Filter condition
filt = (footballer_df['first_name'] == 'leonardo') & (footballer_df['last_name'] == 'Bonucci')
footballer_df.loc[filt, 'age'] = '34'
footballer_df.loc[filt]

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,leonardo,Bonucci,34,Defender,Juventus,Italy


In [8]:
# This way it does not update the value, notice the warning
footballer_df[filt]["age"] = '32'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [9]:
footballer_df.loc[0] # Its still '34', not updated

first_name     leonardo
last_name       Bonucci
age                  34
position       Defender
club           Juventus
nationality       Italy
Name: 0, dtype: object

In [10]:
# Getting lengh of each entry in "club" column
footballer_df['club'].apply(len)  # Notice, () has not provided in the len Function

0     8
1    17
2     9
3     9
4    10
Name: club, dtype: int64

In [11]:
# Used apply() Function in whole DataFrame
footballer_df.apply(len)  # This Returns the number of entry in each column

first_name     5
last_name      5
age            5
position       5
club           5
nationality    5
dtype: int64

In [12]:
footballer_df.apply(len, axis = 'columns')  # Default axis is rows

0    6
1    6
2    6
3    6
4    6
dtype: int64

In [13]:
# Using apply() function converting the first character of first_name column to upper case
footballer_df['first_name'] = footballer_df['first_name'].apply(lambda s: s.capitalize())
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,Leonardo,Bonucci,34,Defender,Juventus,Italy
1,Bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,Antoine,Griezmann,30,Forward,Barcelona,France
3,Loris,Karius,28,Goalkeeper,Liverpool,Germany
4,Alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


In [14]:
# Changed the datatype of values in 'age' column
footballer_df['age'] = footballer_df['age'].apply(lambda e: int(e))
footballer_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   first_name   5 non-null      object
 1   last_name    5 non-null      object
 2   age          5 non-null      int64 
 3   position     5 non-null      object
 4   club         5 non-null      object
 5   nationality  5 non-null      object
dtypes: int64(1), object(5)
memory usage: 368.0+ bytes


In [15]:
# Applying function to each Series object in the DataFrame, approach-1
footballer_df.apply(pd.Series.min)

first_name             Alexis
last_name             Bonucci
age                        22
position       Centre-Forward
club               A.C. Milan
nationality           Belgium
dtype: object

In [16]:
# Applying function to each Series object in the DataFrame, approach-2
footballer_df.apply(lambda s: s.min())

first_name             Alexis
last_name             Bonucci
age                        22
position       Centre-Forward
club               A.C. Milan
nationality           Belgium
dtype: object

In [17]:
# Selecting all columns except "age"
cols = footballer_df.columns.difference(['age'])
# Applying len() Function to each element in DataFrame with selected columns
footballer_df[cols].applymap(len)

Unnamed: 0,club,first_name,last_name,nationality,position
0,8,8,7,5,8
1,17,8,5,8,10
2,9,7,9,6,7
3,9,5,6,7,10
4,10,6,12,7,14


In [18]:
# Changing value using map() Function
footballer_df['club'].map({'Manchester United':'Manchester City'}, na_action='ignore')

0                NaN
1    Manchester City
2                NaN
3                NaN
4                NaN
Name: club, dtype: object

In [19]:
# Changing value using replace() Function
footballer_df['position'].replace({'Centre-Forward':'Right Winger'})

0        Defender
1      Midfielder
2         Forward
3      Goalkeeper
4    Right Winger
Name: position, dtype: object

In [20]:
# Adding a row using append() Function, it accepts Dict object to add row, only if ignore_index=True is provided
# This returns a new DataFrame with Appended row. Assign this returned DataFrame to the original one if
# you want to keep this change
footballer_df.append({'first_name':'Thomas','last_name':'Partey', 'nationality':'Ghana'}, ignore_index=True)

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,Leonardo,Bonucci,34.0,Defender,Juventus,Italy
1,Bernardo,Silva,26.0,Midfielder,Manchester United,Portugal
2,Antoine,Griezmann,30.0,Forward,Barcelona,France
3,Loris,Karius,28.0,Goalkeeper,Liverpool,Germany
4,Alexis,Saelemaekers,22.0,Centre-Forward,A.C. Milan,Belgium
5,Thomas,Partey,,,,Ghana


In [21]:
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,Leonardo,Bonucci,34,Defender,Juventus,Italy
1,Bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,Antoine,Griezmann,30,Forward,Barcelona,France
3,Loris,Karius,28,Goalkeeper,Liverpool,Germany
4,Alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium


In [22]:
new_footballer = {"first_name":["Thomas", "Andriy"],
          "last_name":["Partey ", "Lunin"],
          "age": [28, "22"],
          "position":["Midfielder", "Goalkeeper"],
          "club":["Arsenal", "Real Madrid"],
          "nationality": ["Ghana", "Ukraine"]
        }

In [23]:
# Created another DataFrame
new_footballer_df = pd.DataFrame(new_footballer)
new_footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,Thomas,Partey,28,Midfielder,Arsenal,Ghana
1,Andriy,Lunin,22,Goalkeeper,Real Madrid,Ukraine


In [24]:
# Appended one DataFrame to another one
footballer_df = footballer_df.append(new_footballer_df, ignore_index=True)

In [91]:
footballer_df

Unnamed: 0,first_name,last_name,age,position,club,nationality
0,Leonardo,Bonucci,34,Defender,Juventus,Italy
1,Bernardo,Silva,26,Midfielder,Manchester United,Portugal
2,Antoine,Griezmann,30,Forward,Barcelona,France
3,Loris,Karius,28,Goalkeeper,Liverpool,Germany
4,Alexis,Saelemaekers,22,Centre-Forward,A.C. Milan,Belgium
5,Thomas,Partey,28,Midfielder,Arsenal,Ghana
6,Andriy,Lunin,22,Goalkeeper,Real Madrid,Ukraine
7,Thomas,Partey,28,Midfielder,Arsenal,Ghana
8,Andriy,Lunin,22,Goalkeeper,Real Madrid,Ukraine
