In [1]:
import pandas as pd
import numpy as np

**pandas concat** syntax 
 - pandas.concat(objs, *, axis=0, join='outer', ignore_index=False, keys=None, levels=None, names=None, 
                 verify_integrity=False, sort=False, copy=None)

In [2]:
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[4, 5, 6, 7],
)


df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B9", "B10", "B11"],
        "C": ["C8", "C9", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    },
    index=[8, 9, 10, 11],
)


frames = [df1, df2, df3]

result = pd.concat(frames)

In [3]:
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


#### Merging the pandas dataframe
**Syntax** </br>
   - DataFrame.merge(right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=None, indicator=False, validate=None)

In [5]:
dict1 = {'Name': ['Sundar', 'Sunil', 'Anjali'], 'Country': ['India', 'India', 'USA'], 'Role': ['CEO', 'CTO', 'CTO']}

df1 = pd.DataFrame(dict1)

print('DataFrame 1:\n', df1)

DataFrame 1:
      Name Country Role
0  Sundar   India  CEO
1   Sunil   India  CTO
2  Anjali     USA  CTO


In [9]:
df2 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Sundar', 'Ananth', 'Amit']})
print('DataFrame 2:\n', df2)

DataFrame 2:
    ID    Name
0   1  Sundar
1   2  Ananth
2   3    Amit


In [10]:
df_merged = df1.merge(df2)
print('Result:\n', df_merged)

Result:
      Name Country Role  ID
0  Sundar   India  CEO   1


#### Merging the pandas dataframe on specific columns

In [12]:
import pandas as pd

d1 = {'Name': ['Sundar', 'Sunil', 'Anjali'], 'ID': [1, 2, 3], 'Country': ['India', 'India', 'USA'],
      'Role': ['CEO', 'CTO', 'CTO']}
df1 = pd.DataFrame(d1)

In [13]:
df2 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Sundar', 'Ananth', 'Amit']})

In [14]:
print(df1.merge(df2, on='ID'))

   Name_x  ID Country Role  Name_y
0  Sundar   1   India  CEO  Sundar
1   Sunil   2   India  CTO  Ananth
2  Anjali   3     USA  CTO    Amit


In [15]:
print(df1.merge(df2, on='Name'))

     Name  ID_x Country Role  ID_y
0  Sundar     1   India  CEO     1


#### using the Left and Right Columns for Merging DataFrame Objects

In [18]:
import pandas as pd

d1 = {'Name': ['Sundar', 'Sunil', 'Anjali'], 'ID1': [1, 2, 3], 'Country': ['India', 'India', 'USA'],
      'Role': ['CEO', 'CTO', 'CTO']}
df1 = pd.DataFrame(d1)

df2 = pd.DataFrame({'ID2': [1, 2, 3], 'Name': ['Sundar', 'Ananth', 'Amit']})

print(df1.merge(df2))

     Name  ID1 Country Role  ID2
0  Sundar    1   India  CEO    1


In [19]:
print(df1.merge(df2, left_on='ID1', right_on='ID2'))

   Name_x  ID1 Country Role  ID2  Name_y
0  Sundar    1   India  CEO    1  Sundar
1   Sunil    2   India  CTO    2  Ananth
2  Anjali    3     USA  CTO    3    Amit


#### using index to join merging of the dataframes

In [20]:
import pandas as pd

d1 = {'Name': ['Sundar', 'Sunil', 'Anjali'], 'Country': ['India', 'India', 'USA'], 'Role': ['CEO', 'CTO', 'CTO']}
df1 = pd.DataFrame(d1)

df2 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Sundar', 'Ananth', 'Amit']})

In [21]:
df_merged = df1.merge(df2)
print('Result Default Merge:\n', df_merged)

Result Default Merge:
      Name Country Role  ID
0  Sundar   India  CEO   1


In [22]:
df_merged = df1.merge(df2, left_index=True, right_index=True)
print('\nResult Index Merge:\n', df_merged)


Result Index Merge:
    Name_x Country Role  ID  Name_y
0  Sundar   India  CEO   1  Sundar
1   Sunil   India  CTO   2  Ananth
2  Anjali     USA  CTO   3    Amit


#### Joining the dataframes

In [23]:
df1 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df1

Unnamed: 0,key,A
0,K0,A0
1,K1,A1
2,K2,A2
3,K3,A3
4,K4,A4
5,K5,A5


In [25]:
df2 =pd.DataFrame({'key': ['K0', 'K1', 'K2'],
                      'B': ['B0', 'B1', 'B2']})
df2

Unnamed: 0,key,B
0,K0,B0
1,K1,B1
2,K2,B2


In [26]:
df1.join(df2, lsuffix='_caller', rsuffix='_other')
  

Unnamed: 0,key_caller,A,key_other,B
0,K0,A0,K0,B0
1,K1,A1,K1,B1
2,K2,A2,K2,B2
3,K3,A3,,
4,K4,A4,,
5,K5,A5,,


- If we want to join using the key columns, we need to set key to be the index in both df1 and df2. The joined DataFrame will have key as its index.

In [29]:
df1.set_index('key').join(df2.set_index('key'))

Unnamed: 0_level_0,A,B
key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,B2
K3,A3,
K4,A4,
K5,A5,


- Another option to join using the key columns is to use the on parameter. DataFrame.join always uses other’s index but we can use any column in df. This method preserves the original DataFrame’s index in the result.

In [30]:
df1.join(df2.set_index('key'), on='key')

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,
4,K4,A4,
5,K5,A5,


In [31]:
df1 = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'],
                   'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
df1

Unnamed: 0,key,A
0,K0,A0
1,K1,A1
2,K1,A2
3,K3,A3
4,K0,A4
5,K1,A5


In [32]:
df1.join(df2.set_index('key'), on='key', validate='m:1')

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K1,A2,B1
3,K3,A3,
4,K0,A4,B0
5,K1,A5,B1
