In [1]:
import numpy as np
import pandas as pd

# *Series

In [2]:
lbl = ['x','y','z']
my_data = [11,22,33]
arr = np.array(my_data)
d = {'a':10, 'b':20,'c':30}

## Creating pandas series

### 1) Using Python List

In [3]:
pd.Series(my_data)               #it looks like numpy array but it has index

0    11
1    22
2    33
dtype: int64

In [4]:
pd.Series(my_data,lbl)       # setting label as index

x    11
y    22
z    33
dtype: int64

### 2) Using Numpy Array

In [5]:
pd.Series(arr)

0    11
1    22
2    33
dtype: int32

### 3) Using Python Dictionary

In [6]:
pd.Series(d)      # it automatically takes key as index 

a    10
b    20
c    30
dtype: int64

In [7]:
## series can almost hold any type object for eg. as below

In [8]:
lbl

['x', 'y', 'z']

In [9]:
pd.Series(lbl)

0    x
1    y
2    z
dtype: object

In [10]:
pd.Series([sum,print,len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [11]:
## Accessing values using index/key

In [12]:
series1 =  pd.Series([1,2,3,4], ['India','SriLanka','Bhutan','Nepal'])
series2 =  pd.Series([1,2,3,4],['India','Afganistan','Bhutan','Myanmar'])

In [13]:
series1['Nepal']

4

In [14]:
series2[2]

3

In [15]:
series1 + series2

Afganistan    NaN
Bhutan        6.0
India         2.0
Myanmar       NaN
Nepal         NaN
SriLanka      NaN
dtype: float64

# *DataFrame

In [16]:
import pandas as pd
import numpy as np
from numpy.random import randn
np.random.seed(101)

In [17]:
df = pd.DataFrame(randn(5,4),['a','b','c','d','e'],['w','x','y','z'])
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [18]:
### accessing columns

In [19]:
df['w']

a    2.706850
b    0.651118
c   -2.018168
d    0.188695
e    0.190794
Name: w, dtype: float64

In [20]:
type(df['w'])

pandas.core.series.Series

In [21]:
type(df)

pandas.core.frame.DataFrame

In [22]:
df.w     #in sql way

a    2.706850
b    0.651118
c   -2.018168
d    0.188695
e    0.190794
Name: w, dtype: float64

In [23]:
df.isnull()

Unnamed: 0,w,x,y,z
a,False,False,False,False
b,False,False,False,False
c,False,False,False,False
d,False,False,False,False
e,False,False,False,False


In [24]:
df.keys()

Index(['w', 'x', 'y', 'z'], dtype='object')

In [25]:
df[['w','z']]

Unnamed: 0,w,z
a,2.70685,0.503826
b,0.651118,0.605965
c,-2.018168,-0.589001
d,0.188695,0.955057
e,0.190794,0.683509


### adding new column

In [26]:
df['new'] = df['w'] + df['y']

In [27]:
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,3.614819
b,0.651118,-0.319318,-0.848077,0.605965,-0.196959
c,-2.018168,0.740122,0.528813,-0.589001,-1.489355
d,0.188695,-0.758872,-0.933237,0.955057,-0.744542
e,0.190794,1.978757,2.605967,0.683509,2.796762


### droping column

In [28]:
df.drop('new', axis=1)

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [29]:
df

Unnamed: 0,w,x,y,z,new
a,2.70685,0.628133,0.907969,0.503826,3.614819
b,0.651118,-0.319318,-0.848077,0.605965,-0.196959
c,-2.018168,0.740122,0.528813,-0.589001,-1.489355
d,0.188695,-0.758872,-0.933237,0.955057,-0.744542
e,0.190794,1.978757,2.605967,0.683509,2.796762


In [30]:
df.drop('new', axis=1, inplace=True)

In [31]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [32]:
df.drop('e')    # bydefault axes is 0

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057


In [33]:
df.shape

(5, 4)

In [34]:
df[['x','y','z']]

Unnamed: 0,x,y,z
a,0.628133,0.907969,0.503826
b,-0.319318,-0.848077,0.605965
c,0.740122,0.528813,-0.589001
d,-0.758872,-0.933237,0.955057
e,1.978757,2.605967,0.683509


In [35]:
## Rows

In [36]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [37]:
df.loc['b']

w    0.651118
x   -0.319318
y   -0.848077
z    0.605965
Name: b, dtype: float64

In [38]:
df.iloc[2]        # integer based even your index is not an integer

w   -2.018168
x    0.740122
y    0.528813
z   -0.589001
Name: c, dtype: float64

### dealing with subset

In [39]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [40]:
df.loc['d','y']

-0.9332372163009188

In [41]:
df.loc[['b','c'],['x','z']]

Unnamed: 0,x,z
b,-0.319318,0.605965
c,0.740122,-0.589001


In [42]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [43]:
df>0

Unnamed: 0,w,x,y,z
a,True,True,True,True
b,True,False,False,True
c,False,True,True,False
d,True,False,False,True
e,True,True,True,True


In [44]:
df[df>0]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,,,0.605965
c,,0.740122,0.528813,
d,0.188695,,,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [45]:
df['w']>0

a     True
b     True
c    False
d     True
e     True
Name: w, dtype: bool

In [46]:
df[df['w']>0]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [47]:
df[df['z']<0]

Unnamed: 0,w,x,y,z
c,-2.018168,0.740122,0.528813,-0.589001


In [48]:
df[df['w']>0][['y','x']]

# OR divide in chuncks

# boolseries = df['w']>0
# rslt =df[boolseries]
# mycols = ['y','x']
# rslt[mycols]


Unnamed: 0,y,x
a,0.907969,0.628133
b,-0.848077,-0.319318
d,-0.933237,-0.758872
e,2.605967,1.978757


In [49]:
## multiple condition

In [50]:
df[(df['w']>0) & (df['y']<1)]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
d,0.188695,-0.758872,-0.933237,0.955057


In [51]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


### set & reset index

In [52]:
### reset index

In [53]:
df.reset_index() 

Unnamed: 0,index,w,x,y,z
0,a,2.70685,0.628133,0.907969,0.503826
1,b,0.651118,-0.319318,-0.848077,0.605965
2,c,-2.018168,0.740122,0.528813,-0.589001
3,d,0.188695,-0.758872,-0.933237,0.955057
4,e,0.190794,1.978757,2.605967,0.683509


In [54]:
## set index

In [55]:
newindex = 'MH MP KL UP AP'.split()
newindex

['MH', 'MP', 'KL', 'UP', 'AP']

In [56]:
df['states'] = newindex

In [57]:
df

Unnamed: 0,w,x,y,z,states
a,2.70685,0.628133,0.907969,0.503826,MH
b,0.651118,-0.319318,-0.848077,0.605965,MP
c,-2.018168,0.740122,0.528813,-0.589001,KL
d,0.188695,-0.758872,-0.933237,0.955057,UP
e,0.190794,1.978757,2.605967,0.683509,AP


In [58]:
df.set_index('states')

Unnamed: 0_level_0,w,x,y,z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MH,2.70685,0.628133,0.907969,0.503826
MP,0.651118,-0.319318,-0.848077,0.605965
KL,-2.018168,0.740122,0.528813,-0.589001
UP,0.188695,-0.758872,-0.933237,0.955057
AP,0.190794,1.978757,2.605967,0.683509


### Multiple Index

In [59]:
# index level 
first = ['G1','G1','G1','G2','G2','G2']
second = [1,2,3,1,2,3]
hr_index = list(zip(first , second))
hr_index = pd.MultiIndex.from_tuples(hr_index)

In [60]:
list(zip(first , second))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [61]:
hr_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [62]:
mul_df = pd.DataFrame(randn(6,2),hr_index,['A','B'])

In [63]:
mul_df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [64]:
mul_df.loc['G1']

Unnamed: 0,A,B
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [65]:
mul_df.loc['G1'].loc[2]

A   -1.706086
B   -1.159119
Name: 2, dtype: float64

In [66]:
# set index name

In [67]:
mul_df.index.names

FrozenList([None, None])

In [68]:
mul_df.index.names = ['Group','No']

In [69]:
mul_df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,No,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [70]:
mul_df.loc['G1'].loc[3]['B']

0.39052784273374097

In [71]:
# Cross-Section accessing/grabing

In [72]:
mul_df.xs(1,level='No')                   # it'll return row 1 in both group G1 & G2  which is difficult in case of loc()

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502


# Handling Missing Data

In [73]:
import numpy as np
import pandas as pd

In [74]:
d = {'A': [8,9,np.nan], 'B': [5,np.nan,np.nan], 'C': [4,5,6]}

In [75]:
df = pd.DataFrame(d)

In [76]:
df

Unnamed: 0,A,B,C
0,8.0,5.0,4
1,9.0,,5
2,,,6


### dropna()   

In [77]:
 ## pandas will drop any rows or columns if they have null values you can use axis to specify row(axis=0) or column(axis=1)

In [78]:
df.dropna()

Unnamed: 0,A,B,C
0,8.0,5.0,4


In [79]:
df.dropna(axis=1)

Unnamed: 0,C
0,4
1,5
2,6


In [80]:
df.dropna(thresh=2)   ### it will drop whish has more than 2 null value

Unnamed: 0,A,B,C
0,8.0,5.0,4
1,9.0,,5


### fillna()

In [81]:
## can “fill in” NA values with non-NA data in a couple of ways, which we illustrate

In [82]:
df.fillna(value="Fill Value")

Unnamed: 0,A,B,C
0,8.0,5.0,4
1,9.0,Fill Value,5
2,Fill Value,Fill Value,6


In [83]:
df['A'].fillna(value=df['A'].mean())             ## fill missing value in it's mean in column 'A'

0    8.0
1    9.0
2    8.5
Name: A, dtype: float64

In [84]:
df['B'].fillna(value=df['B'].mean())

0    5.0
1    5.0
2    5.0
Name: B, dtype: float64

## Groupby

In [85]:
## Groupby allows you to group together rows based off a column and perform an aggregate function on them
import numpy as np
import pandas as pd

In [86]:
data = {'Company': ['Ggl','FB','IBM','APL','MSFT','AMZN'],
        'Person' : ['Rahul','Dheeraj','Shaam','Shundar','Chandu','Vivek'],
        'Sales' : [200,120,310,220,250,125]
       }

In [87]:
data

{'Company': ['Ggl', 'FB', 'IBM', 'APL', 'MSFT', 'AMZN'],
 'Person': ['Rahul', 'Dheeraj', 'Shaam', 'Shundar', 'Chandu', 'Vivek'],
 'Sales': [200, 120, 310, 220, 250, 125]}

In [88]:
df = pd.DataFrame(data)

In [89]:
df

Unnamed: 0,Company,Person,Sales
0,Ggl,Rahul,200
1,FB,Dheeraj,120
2,IBM,Shaam,310
3,APL,Shundar,220
4,MSFT,Chandu,250
5,AMZN,Vivek,125


In [90]:
byComp = df.groupby('Company')

In [91]:
byComp

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000026C45213070>

In [92]:
byComp.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
AMZN,125
APL,220
FB,120
Ggl,200
IBM,310
MSFT,250


In [93]:
byComp.std()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
AMZN,
APL,
FB,
Ggl,
IBM,
MSFT,


In [94]:
byComp.sum()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
AMZN,125
APL,220
FB,120
Ggl,200
IBM,310
MSFT,250


In [95]:
byComp.sum().loc['FB']

# OR
df.groupby('Company').sum().loc['FB']

Sales    120
Name: FB, dtype: int64

In [96]:
byComp.count()

#OR
df.groupby('Company').count()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
AMZN,1,1
APL,1,1
FB,1,1
Ggl,1,1
IBM,1,1
MSFT,1,1


In [97]:
byComp.max()

#OR
df.groupby('Company').max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
AMZN,Vivek,125
APL,Shundar,220
FB,Dheeraj,120
Ggl,Rahul,200
IBM,Shaam,310
MSFT,Chandu,250


In [98]:
byComp.min()

#OR
df.groupby('Company').min()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
AMZN,Vivek,125
APL,Shundar,220
FB,Dheeraj,120
Ggl,Rahul,200
IBM,Shaam,310
MSFT,Chandu,250


In [99]:
df.groupby('Company').describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
AMZN,1.0,125.0,,125.0,125.0,125.0,125.0,125.0
APL,1.0,220.0,,220.0,220.0,220.0,220.0,220.0
FB,1.0,120.0,,120.0,120.0,120.0,120.0,120.0
Ggl,1.0,200.0,,200.0,200.0,200.0,200.0,200.0
IBM,1.0,310.0,,310.0,310.0,310.0,310.0,310.0
MSFT,1.0,250.0,,250.0,250.0,250.0,250.0,250.0


In [100]:
df.groupby('Company').describe().transpose()

Unnamed: 0,Company,AMZN,APL,FB,Ggl,IBM,MSFT
Sales,count,1.0,1.0,1.0,1.0,1.0,1.0
Sales,mean,125.0,220.0,120.0,200.0,310.0,250.0
Sales,std,,,,,,
Sales,min,125.0,220.0,120.0,200.0,310.0,250.0
Sales,25%,125.0,220.0,120.0,200.0,310.0,250.0
Sales,50%,125.0,220.0,120.0,200.0,310.0,250.0
Sales,75%,125.0,220.0,120.0,200.0,310.0,250.0
Sales,max,125.0,220.0,120.0,200.0,310.0,250.0


In [101]:
df.groupby('Company').describe().transpose()['Ggl']

Sales  count      1.0
       mean     200.0
       std        NaN
       min      200.0
       25%      200.0
       50%      200.0
       75%      200.0
       max      200.0
Name: Ggl, dtype: float64

## Merging, Joining and Concatenating

In [102]:
df1 = pd.DataFrame({'A':['A0','A1','A2','A3'],
                    'B':['B0','B1','B2','B3'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']   
                   },
                   index = [0,1,2,3])

In [103]:
df2 = pd.DataFrame({'A':['A4','A5','A6','A7'],
                    'B':['B4','B5','B6','B7'],
                    'C':['C4','C5','C6','C7'],
                    'D':['D4','D5','D6','D7']   
                   },
                   index = [4,5,6,7])

In [104]:
df3 = pd.DataFrame({'A':['A8','A9','A10','A11'],
                    'B':['B8','B9','B10','B11'],
                    'C':['C8','C9','C10','C11'],
                    'D':['D8','D9','D10','D11']   
                   },
                   index = [8,9,10,11])

In [105]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [106]:
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [107]:
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


### Concatenation

In [108]:
# you can use ps.concat and pass in a list of DataFrames to concatenate together. Dimension should match along the axis you are concatenating

In [109]:
pd.concat([df1,df2,df3])    # along rows

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [110]:
pd.concat([df1,df2,df3], axis=1)    # along column

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,A0,B0,C0,D0,,,,,,,,
1,A1,B1,C1,D1,,,,,,,,
2,A2,B2,C2,D2,,,,,,,,
3,A3,B3,C3,D3,,,,,,,,
4,,,,,A4,B4,C4,D4,,,,
5,,,,,A5,B5,C5,D5,,,,
6,,,,,A6,B6,C6,D6,,,,
7,,,,,A7,B7,C7,D7,,,,
8,,,,,,,,,A8,B8,C8,D8
9,,,,,,,,,A9,B9,C9,D9


In [111]:
### Another Dataframe

In [112]:
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']
                    })

right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'C':['C0','C1','C2','C3'],
                     'D':['D0','D1','D2','D3']
                    })

In [113]:
left

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [114]:
right

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


### Merging

In [115]:
# The merge function allows you to merge DataFrames together using a similar logic as merging SQL Tables together, For example:

In [116]:
pd.merge(left,right,how='inner',on='key')

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [117]:
# or to show more complicated example

In [118]:
left = pd.DataFrame({'key1':['K0','K1','K2','K3'],
                     'key2':['K0','K1','K0','K1'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']
                    })

right = pd.DataFrame({'key1':['K0','K1','K2','K3'],
                      'key2':['K0','K0','K0','K0'],
                     'C':['C0','C1','C2','C3'],
                     'D':['D0','D1','D2','D3']
                    })

In [119]:
pd.merge(left,right, on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K2,K0,A2,B2,C2,D2


In [120]:
pd.merge(left,right,how='outer',on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K1,A1,B1,,
2,K2,K0,A2,B2,C2,D2
3,K3,K1,A3,B3,,
4,K1,K0,,,C1,D1
5,K3,K0,,,C3,D3


In [121]:
pd.merge(left,right,how='right',on=['key1','key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,,,C1,D1
2,K2,K0,A2,B2,C2,D2
3,K3,K0,,,C3,D3


### Joining

In [122]:
## Joining is a convenient method for combining the columns of two potentially differently-indexed DataFrames into a single result DatFrame

In [123]:
left = pd.DataFrame({'A':['A0','A1','A2'],
                     'B':['B0','B1','B2'],                  
                    },index=['K0','K1','K2'])

right = pd.DataFrame({'C':['C0','C2','C3'],
                     'D':['D0','D2','D3'],    
                    },index=['K0','K2','K3'])

In [124]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [125]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3
