In [2]:
# Loading Sample Pandas DataFrames
import pandas as pd

df1 = pd.DataFrame.from_dict({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c'], 'col3': ['a1', 'b2', 'c3']})
df2 = pd.DataFrame.from_dict({'col1': [4, 5, 6], 'col2': ['d', 'e', 'f'], 'col3': ['d4', 'e5', 'f6']})
df3 = pd.DataFrame.from_dict({'col1': [7, 8, 9], 'col2': ['g', 'h', 'i'], 'col4': ['g7', 'h2', 'i3']})

print('df1 looks like:')
print(df1)
print('\ndf2 looks like:')
print(df2)
print('\ndf3 looks like:')
print(df3)

df1 looks like:
   col1 col2 col3
0     1    a   a1
1     2    b   b2
2     3    c   c3

df2 looks like:
   col1 col2 col3
0     4    d   d4
1     5    e   e5
2     6    f   f6

df3 looks like:
   col1 col2 col4
0     7    g   g7
1     8    h   h2
2     9    i   i3


In [None]:
# Understanding the Pandas concat() function
import pandas as pd
pd.concat(
    objs,                   # The Series or DataFrame objects to concatenate
    axis=0,                 # The axis to concatenate along
    join='outer',           # How to handle additional data elements
    ignore_index=False,     # Whether to maintain the original index or not
    keys=None,              # when multiple levels are passed
    levels=None,            # Levels to use to construct a multi-index
    names=None,             # Names for levels to use in multi-index
    verify_integrity=False, # Check if the new axis contains duplicates
    sort=False,             # Sort the non-concatenation axis, if columns aren't aligned
    copy=True               # If False, do not copy data unnecessarily
)

NameError: ignored

In [None]:
# Concatenating simple DataFrames
df_concat = pd.concat([df1, df2])
print(df_concat)

   col1 col2 col3
0     1    a   a1
1     2    b   b2
2     3    c   c3
0     4    d   d4
1     5    e   e5
2     6    f   f6


In [None]:
# Ignoring an index when appending
df_concat = pd.concat([df1, df2], ignore_index=True)
print(df_concat)


   col1 col2 col3
0     1    a   a1
1     2    b   b2
2     3    c   c3
3     4    d   d4
4     5    e   e5
5     6    f   f6


In [None]:
# Concatenating DataFrames with Different Columns
df_concat = pd.concat([df1, df3], ignore_index=True)
print(df_concat)

   col1 col2 col3 col4
0     1    a   a1  NaN
1     2    b   b2  NaN
2     3    c   c3  NaN
3     7    g  NaN   g7
4     8    h  NaN   h2
5     9    i  NaN   i3


In [None]:
# Changing to an inner concatenation
df_concat = pd.concat([df1, df3], ignore_index=True, join='inner')
print(df_concat)

   col1 col2
0     1    a
1     2    b
2     3    c
3     7    g
4     8    h
5     9    i


In [None]:
# Concatenating Multiple DataFrames
df_concat = pd.concat([df1, df2, df3], ignore_index=True)
print(df_concat)

   col1 col2 col3 col4
0     1    a   a1  NaN
1     2    b   b2  NaN
2     3    c   c3  NaN
3     4    d   d4  NaN
4     5    e   e5  NaN
5     6    f   f6  NaN
6     7    g  NaN   g7
7     8    h  NaN   h2
8     9    i  NaN   i3


In [None]:
# Appending Two DataFrames with .append()
df_append = df1.append(df2, ignore_index=True)
print(df_append)

   col1 col2 col3
0     1    a   a1
1     2    b   b2
2     3    c   c3
3     4    d   d4
4     5    e   e5
5     6    f   f6


In [3]:
# Loading Sample DataFrames
books = pd.DataFrame.from_dict({
    'Author ID': [1,1,2,3],
    'Book ID': [1,2,1,1],
    'Name': ['Intro to Python', 'Python 201', 'Data Science', 'Machine Learning']})

authors = pd.DataFrame.from_dict({
    'Author ID': [1,2,3,4],
    'Name': ['Nik', 'Kate', 'Jane', 'Evan']})

sales = pd.DataFrame.from_dict({
    'Author ID': [1,1,1,2,3,4],
    'Book ID': [1,2,1,1,1,1],
    'Sales': [10, 20, 10, 30, 45, 10]})

print('DataFrame books looks like:')
print(books.head(2))
print('\nDataFrame authors looks like:')
print(authors.head(2))
print('\nDataFrame sales looks like:')
print(sales.head(2))

DataFrame books looks like:
   Author ID  Book ID             Name
0          1        1  Intro to Python
1          1        2       Python 201

DataFrame authors looks like:
   Author ID  Name
0          1   Nik
1          2  Kate

DataFrame sales looks like:
   Author ID  Book ID  Sales
0          1        1     10
1          1        2     20


In [4]:
# Merging DataFrames based on a Single Column
merged = pd.merge(
    left=books,
    right=authors,
    left_on='Author ID',
    right_on='Author ID'
)

print(merged)

   Author ID  Book ID            Name_x Name_y
0          1        1   Intro to Python    Nik
1          1        2        Python 201    Nik
2          2        1      Data Science   Kate
3          3        1  Machine Learning   Jane


In [5]:
# Changing the merge type
merged = pd.merge(
    left=books,
    right=authors,
    left_on='Author ID',
    right_on='Author ID',
    how='outer'
)

print(merged)

   Author ID  Book ID            Name_x Name_y
0          1      1.0   Intro to Python    Nik
1          1      2.0        Python 201    Nik
2          2      1.0      Data Science   Kate
3          3      1.0  Machine Learning   Jane
4          4      NaN               NaN   Evan


In [6]:
# Changing the suffixes of duplicate columns
merged = pd.merge(
    left=books,
    right=authors,
    left_on='Author ID',
    right_on='Author ID',
    suffixes=(' (book)', ' (author)')
)

print(merged)


   Author ID  Book ID       Name (book) Name (author)
0          1        1   Intro to Python           Nik
1          1        2        Python 201           Nik
2          2        1      Data Science          Kate
3          3        1  Machine Learning          Jane


In [9]:
# Merging in Pandas based on a Compound Key
merged = pd.merge(
    left=books,
    right=sales,
    on=['Author ID', 'Book ID']
)

print(merged)

   Author ID  Book ID              Name  Sales
0          1        1   Intro to Python     10
1          1        1   Intro to Python     10
2          1        2        Python 201     20
3          2        1      Data Science     30
4          3        1  Machine Learning     45


In [10]:
# Understanding the .join() Method
df.join(
    other,
    on=None,
    how='left',
    lsuffix='',
    rsuffix='',
    sort=False
)

NameError: ignored

In [11]:
# Using .join to Join Two DataFrames
joined = books.join(authors, on='Author ID', lsuffix='_books', rsuffix='_authors')
print(joined)
 

   Author ID_books  Book ID        Name_books  Author ID_authors Name_authors
0                1        1   Intro to Python                  2         Kate
1                1        2        Python 201                  2         Kate
2                2        1      Data Science                  3         Jane
3                3        1  Machine Learning                  4         Evan
