In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.Series(np.random.uniform(size=9),
                 index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1    0.659166
   2    0.461313
   3    0.281994
b  1    0.101784
   3    0.398133
c  1    0.218568
   2    0.712909
d  2    0.079530
   3    0.377556
dtype: float64

In [4]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [5]:
data.a

1    0.659166
2    0.461313
3    0.281994
dtype: float64

In [6]:
data['b'], data.c, data[:-3]

(1    0.101784
 3    0.398133
 dtype: float64,
 1    0.218568
 2    0.712909
 dtype: float64,
 a  1    0.659166
    2    0.461313
    3    0.281994
 b  1    0.101784
    3    0.398133
 c  1    0.218568
 dtype: float64)

In [7]:
data

a  1    0.659166
   2    0.461313
   3    0.281994
b  1    0.101784
   3    0.398133
c  1    0.218568
   2    0.712909
d  2    0.079530
   3    0.377556
dtype: float64

In [8]:
data.loc[:, 1]

a    0.659166
b    0.101784
c    0.218568
dtype: float64

In [9]:
data.unstack()

Unnamed: 0,1,2,3
a,0.659166,0.461313,0.281994
b,0.101784,,0.398133
c,0.218568,0.712909,
d,,0.07953,0.377556


In [10]:
data.unstack().stack()# The inverse operation of .unstack() is .stack()

a  1    0.659166
   2    0.461313
   3    0.281994
b  1    0.101784
   3    0.398133
c  1    0.218568
   2    0.712909
d  2    0.079530
   3    0.377556
dtype: float64

In [11]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
columns=[['Ohio', 'Ohio', 'Colorado'],
['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [12]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


## 8.2 Combining and Merging Datasets

In [13]:
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
'data1': range(7)})
df2 = pd.DataFrame({'key': ['a', 'b', 'd'],
'data2': range(3)})


In [14]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [15]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [16]:
pd.merge(df1, df2) # Merge or join operations combine datasets by linking rows using one or more keys

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [17]:
pd.merge(df2, df1)

Unnamed: 0,key,data2,data1
0,a,0,2
1,a,0,4
2,a,0,5
3,b,1,0
4,b,1,1
5,b,1,6


In [18]:
pd.merge(df1, df2, on='key')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [19]:
df3 = pd.DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'], 'data1': range(7)})
df4 = pd.DataFrame({'rkey': ['a', 'b', 'd'],'data2': range(3)})

In [20]:
pd.merge(df3, df4, left_on = 'lkey', right_on = 'rkey') #You may notice that the 'c' and 'd' 
# values and associated data are missing from the
# result. By default merge does an 'inner' join; the keys in the result are the intersection,
# or the common set found in both tables. Other possible options are 'left', 'right', and
# 'outer'. The outer join takes the union of the keys, combining the effect of applying both
# left and right joins

Unnamed: 0,lkey,data1,rkey,data2
0,b,0,b,1
1,b,1,b,1
2,b,6,b,1
3,a,2,a,0
4,a,4,a,0
5,a,5,a,0


In [21]:
pd.merge(df1, df2, how = 'outer')

Unnamed: 0,key,data1,data2
0,b,0.0,1.0
1,b,1.0,1.0
2,b,6.0,1.0
3,a,2.0,0.0
4,a,4.0,0.0
5,a,5.0,0.0
6,c,3.0,
7,d,,2.0


<img src = './../Python/DifferentJoinTypesWithHowArgument.png' position = 'center'>

In [22]:
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],'data1': range(6)})
df2 = pd.DataFrame({'key': ['a', 'b', 'a', 'b', 'd'],'data2': range(5)})
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [23]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,a,2
3,b,3
4,d,4


In [24]:
pd.merge(df1, df2, on = 'key', how = 'left')

Unnamed: 0,key,data1,data2
0,b,0,1.0
1,b,0,3.0
2,b,1,1.0
3,b,1,3.0
4,a,2,0.0
5,a,2,2.0
6,c,3,
7,a,4,0.0
8,a,4,2.0
9,b,5,1.0
