## Merge Function

In [4]:
import pandas as pd
df1 = pd.DataFrame({'name' : ['Rahul','Nidhi','Rohini','Monika'],
                   'age' : [23,24,23,26]})
df2 = pd.DataFrame({'name' : ['Nidhi','Rohini','Neha'],
                   'marks': [67,87,88]})

In [15]:
df3 = pd.merge(df1, df2)
df3

Unnamed: 0,name,age,marks
0,Nidhi,24,67
1,Rohini,23,87


In [78]:
df4 = pd.merge(df1, df2, how = 'outer', indicator = True)
df4

Unnamed: 0,name,age,marks,_merge
0,Rahul,23.0,,left_only
1,Nidhi,24.0,67.0,both
2,Rohini,23.0,87.0,both
3,Monika,26.0,,left_only
4,Neha,,88.0,right_only


In [13]:
df5 = pd.merge(df1, df2, how = 'left')
df5

Unnamed: 0,name,age,marks
0,Rahul,23,
1,Nidhi,24,67.0
2,Rohini,23,87.0
3,Monika,26,


In [14]:
df6 = pd.merge(df1, df2, how = 'right')
df6

Unnamed: 0,name,age,marks
0,Nidhi,24.0,67
1,Rohini,23.0,87
2,Neha,,88


## Apply Function

###### Apply() with lambda()

In [76]:
df4['name_len'] = df4['name'].apply(lambda x : len(x))
df4['name_len']

0    5
1    5
2    6
3    6
4    4
Name: name_len, dtype: int64

In [79]:
df4['grade'] = df4['marks'].apply(lambda x : 'A-grade' if x > 70 else 'B-grade')
df4

Unnamed: 0,name,age,marks,_merge,grade
0,Rahul,23.0,,left_only,B-grade
1,Nidhi,24.0,67.0,both,B-grade
2,Rohini,23.0,87.0,both,A-grade
3,Monika,26.0,,left_only,B-grade
4,Neha,,88.0,right_only,A-grade


###### Apply() with user defined function

In [39]:
dates = [{'Issue Date' :'2021-03-04'}, {'Issue Date' :'2020-07-04'}, {'Issue Date' :'2000-03-04'}]
df7 = pd.DataFrame(dates, columns = ['Issue Date'])
df7

Unnamed: 0,Issue Date
0,2021-03-04
1,2020-07-04
2,2000-03-04


In [40]:
def extract_year(x):
    return x.split('-')[0]

In [41]:
df7['year']=df7['Issue Date'].apply(extract_year)
df7['year']

0    2021
1    2020
2    2000
Name: year, dtype: object

In [42]:
def age(year, current_year):
    return current_year - int(year)

In [44]:
df7['age'] = df7['year'].apply(age, current_year = 2018)
df7['age']

0    -3
1    -2
2    18
Name: age, dtype: int64

###### Apply() with lambda list comprehension

In [83]:
def sum_square(a, b):
    return (a+b)**2
df4['sum_square']=[sum_square(a, b) for a, b in df4[['age','marks']].itertuples(index=False)]


In [81]:
df4

Unnamed: 0,name,age,marks,_merge,grade,sum_square
0,Rahul,23.0,,left_only,B-grade,
1,Nidhi,24.0,67.0,both,B-grade,8281.0
2,Rohini,23.0,87.0,both,A-grade,12100.0
3,Monika,26.0,,left_only,B-grade,
4,Neha,,88.0,right_only,A-grade,


In [118]:
df4['NAME']=[i.upper() for i in df4['name']]

In [119]:
df4

Unnamed: 0,name,age,marks,_merge,grade,sum_square,NAME
0,Rahul,23.0,,left_only,B-grade,,RAHUL
1,Nidhi,24.0,67.0,both,B-grade,8281.0,NIDHI
2,Rohini,23.0,87.0,both,A-grade,12100.0,ROHINI
3,Monika,26.0,,left_only,B-grade,,MONIKA
4,Neha,,88.0,right_only,A-grade,,NEHA
