In [1]:
import pandas as pd
import numpy as np

In [3]:
def make_df(cols,inds):
    d = {c: [c+str(i) for i in inds] for c in cols}
    return pd.DataFrame(d, inds)

In [4]:
make_df('ABC',range(5))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
3,A3,B3,C3
4,A4,B4,C4


## Series

In [9]:
s1 = pd.Series(list('ABC'),index=[1,2,3])
s2 = pd.Series(list('DEF'),index=[4,5,6])
s1,s2

(1    A
 2    B
 3    C
 dtype: object, 4    D
 5    E
 6    F
 dtype: object)

In [11]:
pd.concat([s1,s2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

## Dataframe

In [18]:
df1 = make_df('AB',[1,2])
df2 = make_df('AB',[3,4])
pd.concat([df1,df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [32]:
df1 = make_df('AB',[1,2])
df2 = make_df('CD',[1,2])
pd.concat([df1,df2],axis=1)

Unnamed: 0,A,B,C,D
1,A1,B1,C1,D1
2,A2,B2,C2,D2


In [34]:
pd.concat([df1,df2])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B,C,D
1,A1,B1,,
2,A2,B2,,
1,,,C1,D1
2,,,C2,D2


### Handling repeated indexes

In [38]:
df1 = make_df('AB',[1,2])
df2 = make_df('AB',[1,2])
pd.concat([df1,df2],axis=1)

Unnamed: 0,A,B,A.1,B.1
1,A1,B1,A1,B1
2,A2,B2,A2,B2


In [39]:
pd.concat([df1,df2],axis=0)

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
1,A1,B1
2,A2,B2


#### 1. Verify integrety

In [40]:
try:
    pd.concat([df1,df2],verify_integrity=True)
except ValueError as e:
    print('Value error. Integrety mismatch.',e)

Value error. Integrety mismatch. Indexes have overlapping values: Int64Index([1, 2], dtype='int64')


#### 2. ingnore original and override

In [41]:
pd.concat([df1,df2],ignore_index=True)

Unnamed: 0,A,B
0,A1,B1
1,A2,B2
2,A1,B1
3,A2,B2


#### 3. Adding multi index key

In [83]:
a = pd.concat([df1,df2],keys=list('xy'))
a

Unnamed: 0,Unnamed: 1,A,B
x,1,A1,B1
x,2,A2,B2
y,1,A1,B1
y,2,A2,B2


### Join

In [125]:
df1 = make_df('ABC',[1,2])
df2 = make_df('BCD',[3,4])
print(df1)
print(df2)
pd.concat([df1,df2])


    A   B   C
1  A1  B1  C1
2  A2  B2  C2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  """


Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


##### Join as intersection of columns

In [120]:
pd.concat([df1,df2], join="inner")

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [136]:
pd.concat([df1,df2], join_axes=[df1.columns]) #requires array of pandas index.

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4


In [137]:
df1.append(df2)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


# Joining and merging

In [61]:
df1 = make_df('AB',range(3))
df2 = make_df('AC',[2,1,1,0])
print (df1)
print(df2)

    A   B
0  A0  B0
1  A1  B1
2  A2  B2
    A   C
2  A2  C2
1  A1  C1
1  A1  C1
0  A0  C0


In [62]:
pd.merge(df1,df2)

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A1,B1,C1
3,A2,B2,C2


In [63]:
df1 = make_df('ABC',range(3))
df2 = make_df('ACD',[2,0])
print (df1)
print(df2)

    A   B   C
0  A0  B0  C0
1  A1  B1  C1
2  A2  B2  C2
    A   C   D
2  A2  C2  D2
0  A0  C0  D0


In [64]:
pd.merge(df1,df2)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A2,B2,C2,D2


In [65]:
pd.merge(df1,df2,on='A')

Unnamed: 0,A,B,C_x,C_y,D
0,A0,B0,C0,C0,D0
1,A2,B2,C2,C2,D2


In [66]:
pd.merge(df1,df2,on='A',how='outer')

Unnamed: 0,A,B,C_x,C_y,D
0,A0,B0,C0,C0,D0
1,A1,B1,C1,,
2,A2,B2,C2,C2,D2


In [48]:
df1.rename(columns={'A':'A_new'}, inplace = True)
df1.columns

Index(['A_new', 'B', 'C'], dtype='object')

In [50]:
pd.merge(df1,df2,left_on='A_new',right_on='A')

Unnamed: 0,A_new,B,C_x,A,C_y,D
0,A0,B0,C0,A0,C0,D0
1,A2,B2,C2,A2,C2,D2


In [52]:
pd.merge(df1,df2,left_on='A_new',right_on='A').drop('C_x',axis=1)

Unnamed: 0,A_new,B,A,C_y,D
0,A0,B0,A0,C0,D0
1,A2,B2,A2,C2,D2


In [54]:
print (df1)
print(df2)

  A_new   B   C
0    A0  B0  C0
1    A1  B1  C1
2    A2  B2  C2
    A   C   D
2  A2  C2  D2
0  A0  C0  D0


## Join on indexes

In [55]:
pd.merge(df1,df2,left_index=True,right_index=True)

Unnamed: 0,A_new,B,C_x,A,C_y,D
0,A0,B0,C0,A0,C0,D0
2,A2,B2,C2,A2,C2,D2


In [59]:
df1[['B']].join(df2[['A']])

Unnamed: 0,B,A
0,B0,A0
1,B1,
2,B2,A2
