Concatenate pandas objects along a particular axis with optional set logic along the other axes.
Can also add a layer of hierarchical indexing on the concatenation axis, 
which may be useful if the labels are the same (or overlapping) on the passed axis number.

In [1]:
import pandas as pd
s1 = pd.Series(['a', 'b','e','f'])
s2 = pd.Series(['c', 'd','e'])


In [2]:
pd.concat([s1, s2])

0    a
1    b
2    e
3    f
0    c
1    d
2    e
dtype: object

In [4]:
# Clear the existing index and reset it in the result by setting the ignore_index option to True.
pd.concat([s1, s2], ignore_index=True)

0    a
1    b
2    e
3    f
4    c
5    d
6    e
dtype: object

In [6]:
# Add a hierarchical index at the outermost level of the data with the keys option.
pd.concat([s1, s2], keys=['s1', 's2'])

s1  0    a
    1    b
    2    e
    3    f
s2  0    c
    1    d
    2    e
dtype: object

In [5]:
# Label the index keys you create with the names option.

pd.concat([s1, s2], keys=['s1', 's2'],
          names=['Series name', 'Row ID'])

Series name  Row ID
s1           0         a
             1         b
             2         e
             3         f
s2           0         c
             1         d
             2         e
dtype: object

In [7]:
# Combine two DataFrame objects with identical columns.
df1 = pd.DataFrame([['a', 1], ['b', 2]],
                   columns=['letter', 'number'])

df2 = pd.DataFrame([['c', 3], ['d', 4]],
                   columns=['letter', 'number'])

pd.concat([df1, df2])

Unnamed: 0,letter,number
0,a,1
1,b,2
0,c,3
1,d,4


In [8]:
# Combine DataFrame objects with overlapping columns and return everything. 
# Columns outside the intersection will be filled with NaN values.
df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
                   columns=['letter', 'number', 'animal'])
pd.concat([df1, df3], sort=False)


Unnamed: 0,letter,number,animal
0,a,1,
1,b,2,
0,c,3,cat
1,d,4,dog


In [9]:
# Combine DataFrame objects with overlapping columns and return only those 
# that are shared by passing inner to the join keyword argument.
pd.concat([df1, df3], join="inner")

Unnamed: 0,letter,number
0,a,1
1,b,2
0,c,3
1,d,4


In [10]:
# Combine DataFrame objects horizontally along the x axis by passing in axis=1.
df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
                   columns=['animal', 'name'])
pd.concat([df1, df4], axis=1)

Unnamed: 0,letter,number,animal,name
0,a,1,bird,polly
1,b,2,monkey,george


In [11]:
# Prevent the result from including duplicate index values with the verify_integrity option.
df5 = pd.DataFrame([1], index=['a'])
df6 = pd.DataFrame([2], index=['a'])
pd.concat([df5, df6], verify_integrity=True)

ValueError: Indexes have overlapping values: Index(['a'], dtype='object')

In [12]:
df = pd.DataFrame({
    'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
    'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
    'rating': [4, 4, 3.5, 15, 5]
})
df

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [14]:
# By default, it removes duplicate rows based on all columns.
df.drop_duplicates()

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [15]:
# To remove duplicates on specific column(s), use subset.
df.drop_duplicates(subset=['brand'])

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5


In [16]:
# To remove duplicates and keep last occurences, use keep.
df.drop_duplicates(subset=['brand', 'style'], keep='last')

Unnamed: 0,brand,style,rating
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
4,Indomie,pack,5.0
