___


<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="Rossum"></p>

___

## Creating a Pandas Series

In [1]:
import numpy as np
import pandas as pd

### Creating a Pandas Series with Basic Format

In [2]:
ser=pd.Series([5,10,15,20,25])

In [3]:
ser

0     5
1    10
2    15
3    20
4    25
dtype: int64

### Basic Attributes of Series

In [4]:
type(ser)

pandas.core.series.Series

In [5]:
ser.dtype

dtype('int64')

In [6]:
ser.size

5

In [7]:
ser.ndim

1

In [8]:
ser.values

array([ 5, 10, 15, 20, 25], dtype=int64)

In [9]:
ser.head(3)

0     5
1    10
2    15
dtype: int64

In [10]:
ser.tail(2)

3    20
4    25
dtype: int64

### Creating Pandas Series by Using a ``list``, numpy array or ``dict``ionary

In [5]:
labels = [i for i in 'python']
my_list = list(np.arange(6))
d = dict(zip(labels,my_list))

arr = np.array([10, 20, 30,40,50,60])


### **Using Lists**

In [4]:
pd.Series(labels)

0    p
1    y
2    t
3    h
4    o
5    n
dtype: object

0    p
1    y
2    t
3    h
4    o
5    n
dtype: object

### **Using NumPy Arrays**

In [7]:
pd.Series(arr, index = labels)

p    10
y    20
t    30
h    40
o    50
n    60
dtype: int32

p    10
y    20
t    30
h    40
o    50
n    60
dtype: int32

### **Using Dictionary**

In [8]:
pd.Series(d)

p    0
y    1
t    2
h    3
o    4
n    5
dtype: int64

p    0
y    1
t    2
h    3
o    4
n    5
dtype: int64

q    NaN
o    4.0
y    1.0
t    2.0
k    NaN
p    0.0
dtype: float64

### Data in a Series

A pandas Series can hold a variety of object types:

In [9]:
pd.Series({1,2,3,6})

TypeError: 'set' type is unordered

In [10]:
pd.Series(set)

0    <class 'set'>
dtype: object

In [11]:
pd.Series(['pandas', 5, False, np.mean, len])

0                                   pandas
1                                        5
2                                    False
3    <function mean at 0x000001B621337E50>
4                  <built-in function len>
dtype: object

***



## Indexing Pandas Series

The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look up of information.

Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:

In [12]:
ser1 = pd.Series([1,2,3,4,5],index = ['numpy', 'pandas','sql', 'gss','scipy'])                      

In [13]:
ser2 = pd.Series([1,2,5,4,6],index = ['numpy', 'pandas','tableau', 'seaborn','matplotlib'])

In [14]:
ser1

numpy     1
pandas    2
sql       3
gss       4
scipy     5
dtype: int64

In [15]:
ser2

numpy         1
pandas        2
tableau       5
seaborn       4
matplotlib    6
dtype: int64

In [17]:
ser1['numpy']

1

In [18]:
ser2['numpy']

1

In [20]:
ser2['tableau']

5

Operations are then also done based off of index:

In [21]:
ser1 + ser2

gss           NaN
matplotlib    NaN
numpy         2.0
pandas        4.0
scipy         NaN
seaborn       NaN
sql           NaN
tableau       NaN
dtype: float64

### Indexing Examples

In [22]:
np.random.seed(101)
a = np.array(np.random.randint(5,12,6))
panser = pd.Series(a)
panser

0     8
1     6
2    11
3    10
4     8
5     6
dtype: int32

In [23]:
panser[:2]

0    8
1    6
dtype: int32

0    8
1    6
dtype: int32

### pandas_series[index] | pandas_series[[indices, indices...]]

In [24]:
panser = pd.Series(data = [25, 32, 35, 33], index = ["terry", "micheal", "orion", "jason"])
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [25]:
panser['orion']

35

35

In [26]:
index1 = ['terry','micheal','jason']

In [27]:
panser[index1]

terry      25
micheal    32
jason      33
dtype: int64

In [28]:
panser['terry':    ]

terry      25
micheal    32
orion      35
jason      33
dtype: int64

terry      25
micheal    32
orion      35
dtype: int64

In [29]:
panser[ : ]

terry      25
micheal    32
orion      35
jason      33
dtype: int64

terry      121
micheal    200
orion      150
dtype: int64

### Several Selecting Attributes

In [30]:
panser.index

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [31]:
panser.index

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [None]:
panser.

array([25, 32, 35, 33], dtype=int64)

In [32]:
panser.items()

<zip at 0x1b6243bf100>

In [107]:
list(panser.items())

[('terry', 25), ('micheal', 32), ('orion', 35), ('jason', 33)]

In [33]:
'jackson' in panser

False

In [None]:
25 in panser.

True

In [34]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [35]:
panser[panser > 32]

orion    35
jason    33
dtype: int64

In [36]:
panser > 32

terry      False
micheal    False
orion       True
jason       True
dtype: bool

DataFrames

In [37]:
data = np.arange(1,21,4)

In [38]:
pd.DataFrame(data, columns=['column1'])

Unnamed: 0,column1
0,1
1,5
2,9
3,13
4,17


In [39]:
data2 = np.arange(1,18,2).reshape(3,3)

In [40]:
df3=pd.DataFrame(data2, columns=['var1', 'var2', 'var3'])
df3

Unnamed: 0,var1,var2,var3
0,1,3,5
1,7,9,11
2,13,15,17


Unnamed: 0,var1,var2,var3
0,1,3,5
1,7,9,11
2,13,15,17


In [41]:
df3.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [42]:
df3.columns = ['new1', 'new2', 'new3']

In [43]:
df3

Unnamed: 0,new1,new2,new3
0,1,3,5
1,7,9,11
2,13,15,17


for your info:

* 1 dim --> vector 
* 2 dim --> matrix
* 3 dim --> tensor

 - ### Creating a DataFrame using a ``NumPy Arrays``

In [44]:
m=np.arange(1,50,5).reshape(5,2)
m

array([[ 1,  6],
       [11, 16],
       [21, 26],
       [31, 36],
       [41, 46]])

In [45]:
df=pd.DataFrame(m, columns=['col1','col2'])
df

Unnamed: 0,col1,col2
0,1,6
1,11,16
2,21,26
3,31,36
4,41,46


In [47]:
df[:3]

Unnamed: 0,col1,col2
0,1,6
1,11,16
2,21,26


Unnamed: 0,col1,col2
2,21,26
1,11,16


 - ### Creating a DataFrame using a ``dict``

In [48]:
np.random.seed(101)
s1 = np.random.randint(10,18, size = 4)
s2 = np.random.randint(19,27, size = 4)
s3 = np.random.randint(28,35, size = 4)

In [49]:
myDict= {'var1':s1,'var2':s2,'var3':s3}

In [50]:
df1 = pd.DataFrame(myDict)

In [51]:
df1

Unnamed: 0,var1,var2,var3
0,17,26,29
1,13,26,33
2,11,24,28
3,16,22,32


- Simple indexing and slicing the ``DataFrames``

In [54]:
df1[1:3]

Unnamed: 0,var1,var2,var3
1,13,26,33
2,11,24,28


Unnamed: 0,var1,var2,var3
1,13,26,33
2,11,24,28


In [55]:
df1.index

RangeIndex(start=0, stop=4, step=1)

In [56]:
df1.index = list("abcd")

In [57]:
df1

Unnamed: 0,var1,var2,var3
a,17,26,29
b,13,26,33
c,11,24,28
d,16,22,32


In [69]:
df1['b':]

Unnamed: 0,var1,var2,var3
b,13,26,33
c,11,24,28
d,16,22,32


Unnamed: 0,var1,var2,var3
b,13,26,33
c,11,24,28
d,16,22,32


In [163]:
# we can check any column name whether it belongs to the DataFrame or not
"var2" in df1

True

In [164]:
'var5' in df1

False

### Now, let's examine again the ***idexing, selection*** and ***slicing*** methods and several ***attributes*** using a different DataFrame

In [59]:
from numpy.random import randn
np.random.seed(101)

In [64]:
# creating a DataFrame by "keyword arguments"
df3=pd.DataFrame(randn(5,4),columns='w x y z'.split(), index='a b c d e'.split())
df3

Unnamed: 0,w,x,y,z
a,-0.993263,0.1968,-1.136645,0.000366
b,1.025984,-0.156598,-0.031579,0.649826
c,2.154846,-0.610259,-0.755325,-0.346419
d,0.147027,-0.479448,0.558769,1.02481
e,-0.925874,1.862864,-1.133817,0.610478


## Selection and Indexing

Let's learn the various methods to grab data from a DataFrame

In [70]:
df3['b':]

Unnamed: 0,w,x,y,z
b,1.025984,-0.156598,-0.031579,0.649826
c,2.154846,-0.610259,-0.755325,-0.346419
d,0.147027,-0.479448,0.558769,1.02481
e,-0.925874,1.862864,-1.133817,0.610478


#### DataFrame Columns are just Series

In [73]:
df3[['y']]

Unnamed: 0,y
a,-1.136645
b,-0.031579
c,-0.755325
d,0.558769
e,-1.133817


Unnamed: 0,y
a,-0.376519
b,-0.03116
c,0.187125
d,0.961458
e,-1.04678


In [74]:
# Pass a list of column names
c_name = ['w', 'y']
df3[c_name]

Unnamed: 0,w,y
a,-0.993263,-1.136645
b,1.025984,-0.031579
c,2.154846,-0.755325
d,0.147027,0.558769
e,-0.925874,-1.133817


Unnamed: 0,w,y
a,0.38603,-0.376519
b,0.681209,-0.03116
c,-1.005187,0.187125
d,-1.38292,0.961458
e,0.992573,-1.04678


In [175]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [82]:
df3['c':]

Unnamed: 0,w,x,y,z
c,2.154846,-0.610259,-0.755325,-0.346419
d,0.147027,-0.479448,0.558769,1.02481
e,-0.925874,1.862864,-1.133817,0.610478


Unnamed: 0,w,x,y,z
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


**Creating a new column:**

In [None]:

df3

In [84]:
df3['w+z'] = df3.w+df3.z

Unnamed: 0,w,x,y,z,w+z
a,2.70685,0.628133,0.907969,0.503826,3.210676
b,0.651118,-0.319318,-0.848077,0.605965,1.257083
c,-2.018168,0.740122,0.528813,-0.589001,-2.607169
d,0.188695,-0.758872,-0.933237,0.955057,1.143752
e,0.190794,1.978757,2.605967,0.683509,0.874303


 - ### Removing Columns

In [179]:
# Not inplace unless specified!


In [86]:
df3.drop('w+z', axis=1)

Unnamed: 0,w,x,y,z
a,-0.993263,0.1968,-1.136645,0.000366
b,1.025984,-0.156598,-0.031579,0.649826
c,2.154846,-0.610259,-0.755325,-0.346419
d,0.147027,-0.479448,0.558769,1.02481
e,-0.925874,1.862864,-1.133817,0.610478


In [180]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


 - ### Removing rows

In [181]:
# the default value of axis is 0 (axis = 0)


In [88]:
df3.drop('c')

Unnamed: 0,w,x,y,z,w+z
a,-0.993263,0.1968,-1.136645,0.000366,-0.992897
b,1.025984,-0.156598,-0.031579,0.649826,1.67581
d,0.147027,-0.479448,0.558769,1.02481,1.171837
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [182]:
df4

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [91]:
df3['e':]

Unnamed: 0,w,x,y,z,w+z
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


### Selecting Rows

- ### First, let's take a quick look at [`.loc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-loc.ipynb) | [`.iloc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-iloc.ipynb)

#### `.loc[]` → allows us to select data using **labels** (names) of rows (index) & columns

#### `.iloc[]` → allows us to select data using **index numbers** of rows (index) & columns. it's like classical indexing logic

In [98]:
np.random.seed(101)
m=np.random.randint(1,20, size=(5,4))
df4 = pd.DataFrame(m, columns = ["var1","var2","var3",'var4'])
df4

Unnamed: 0,var1,var2,var3,var4
0,12,18,7,12
1,16,10,14,9
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


In [104]:
df4.loc[[4]]

Unnamed: 0,var1,var2,var3,var4
4,16,9,3,13


var1    16
var2     9
var3     3
var4    13
Name: 4, dtype: int32

In [None]:
# Slicing produces the same type of the data. Here, DataFrame


In [106]:
df4.loc[2:5,:]

Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


In [107]:
df4.loc[2:4,:]

Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18


In [109]:

df4.index = list('abcde')

In [110]:
df4

Unnamed: 0,var1,var2,var3,var4
a,12,18,7,12
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [112]:
df4.loc['b':,:]

Unnamed: 0,var1,var2,var3,var4
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


Unnamed: 0,var1,var2,var3,var4
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18


Unnamed: 0,var1,var2,var3,var4
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [211]:
df4

Unnamed: 0,var1,var2,var3,var4
a,12,18,7,12
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [118]:
df4.loc['b':'e','var3']

b    14
c     1
d     9
e     3
Name: var3, dtype: int32

b    14
c     1
d     9
e     3
Name: var3, dtype: int32

In [None]:
# how can we select these data as a DataFrame not a series


In [129]:
df4.loc['b':'e','var2']

b    10
c     9
d    13
e     9
Name: var2, dtype: int32

Unnamed: 0,var2
b,10
c,9
d,13


In [131]:
df4.loc['c':,'var3']

c    1
d    9
e    3
Name: var3, dtype: int32

c    1
d    9
e    3
Name: var3, dtype: int32

Unnamed: 0,var3
c,1
d,9
e,3


In [133]:
df3

Unnamed: 0,w,x,y,z,w+z
a,-0.993263,0.1968,-1.136645,0.000366,-0.992897
b,1.025984,-0.156598,-0.031579,0.649826,1.67581
c,2.154846,-0.610259,-0.755325,-0.346419,1.808428
d,0.147027,-0.479448,0.558769,1.02481,1.171837
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [132]:
df3.loc['c']

w      2.154846
x     -0.610259
y     -0.755325
z     -0.346419
w+z    1.808428
Name: c, dtype: float64

In [136]:
df3.loc[['c']]

Unnamed: 0,w,x,y,z,w+z
c,2.154846,-0.610259,-0.755325,-0.346419,1.808428


Unnamed: 0,w,x,y,z
c,-1.005187,-0.74179,0.187125,-0.732845


### Selecting subset of rows and columns

 - ### `.loc[[row labels|names], [column labels|names]]`

 - ### `.iloc[[row index numbers], [column index numbers]]`

In [139]:
df3

Unnamed: 0,w,x,y,z,w+z
a,-0.993263,0.1968,-1.136645,0.000366,-0.992897
b,1.025984,-0.156598,-0.031579,0.649826,1.67581
c,2.154846,-0.610259,-0.755325,-0.346419,1.808428
d,0.147027,-0.479448,0.558769,1.02481,1.171837
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [143]:
df3.loc['c','x']

-0.6102588558227414

-1.005186917256266

In [None]:
# let's select the same data as a DataFrame


In [146]:
df3

Unnamed: 0,w,x,y,z,w+z
a,-0.993263,0.1968,-1.136645,0.000366,-0.992897
b,1.025984,-0.156598,-0.031579,0.649826,1.67581
c,2.154846,-0.610259,-0.755325,-0.346419,1.808428
d,0.147027,-0.479448,0.558769,1.02481,1.171837
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [147]:
df3.loc[['c','e'],['w','z']]

Unnamed: 0,w,z
c,2.154846,-0.346419
e,-0.925874,0.610478


Unnamed: 0,w,z
c,-1.005187,-0.732845
e,0.992573,1.292765


Unnamed: 0,w,z
c,-2.018168,-0.589001
e,0.190794,0.683509


### Conditional Selection

An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [231]:
df3[df3>0.2]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,,0.230336
b,0.681209,1.035125,,1.939932
c,,,,
d,,1.482495,0.961458,
e,0.992573,1.192241,,1.292765


In [232]:
# It returns based on rows.
df3[df3['y']<0.5]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
e,0.992573,1.192241,-1.04678,1.292765


In [233]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [235]:
df3[df3['w']<1][['z','y']]

Unnamed: 0,z,y
a,0.230336,-0.376519
b,1.939932,-0.03116
c,-0.732845,0.187125
d,-2.141212,0.961458
e,1.292765,-1.04678


#### For two conditions you can use **|** → `or`,  **&** →  `and` with parenthesis:

In [236]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [237]:
df3[(df3['w']>0) & (df3['z']<1)]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336


#### Conditional selection using ``.loc[]`` and ``.iloc[]``

In [238]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [148]:
df3.loc[(df3.y>0), ['w','z']]

Unnamed: 0,w,z
d,0.147027,1.02481


In [149]:
df3.loc[((df3.x>1) | (df3.y<1)), ['x','w']]

Unnamed: 0,x,w
a,0.1968,-0.993263
b,-0.156598,1.025984
c,-0.610259,2.154846
d,-0.479448,0.147027
e,1.862864,-0.925874


## More Index Details

Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!

In [241]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [None]:
# Reset to default 0,1...n index


In [151]:
df3.reset_index(drop = True)

Unnamed: 0,w,x,y,z,w+z
0,-0.993263,0.1968,-1.136645,0.000366,-0.992897
1,1.025984,-0.156598,-0.031579,0.649826,1.67581
2,2.154846,-0.610259,-0.755325,-0.346419,1.808428
3,0.147027,-0.479448,0.558769,1.02481,1.171837
4,-0.925874,1.862864,-1.133817,0.610478,-0.315396


Unnamed: 0,index,w,x,y,z
0,a,0.38603,2.084019,-0.376519,0.230336
1,b,0.681209,1.035125,-0.03116,1.939932
2,c,-1.005187,-0.74179,0.187125,-0.732845
3,d,-1.38292,1.482495,0.961458,-2.141212
4,e,0.992573,1.192241,-1.04678,1.292765


Unnamed: 0,w,x,y,z
0,0.38603,2.084019,-0.376519,0.230336
1,0.681209,1.035125,-0.03116,1.939932
2,-1.005187,-0.74179,0.187125,-0.732845
3,-1.38292,1.482495,0.961458,-2.141212
4,0.992573,1.192241,-1.04678,1.292765


In [152]:
newindx='CA NY WY OR CO'.split()
newindx

['CA', 'NY', 'WY', 'OR', 'CO']

In [153]:
df3

Unnamed: 0,w,x,y,z,w+z
a,-0.993263,0.1968,-1.136645,0.000366,-0.992897
b,1.025984,-0.156598,-0.031579,0.649826,1.67581
c,2.154846,-0.610259,-0.755325,-0.346419,1.808428
d,0.147027,-0.479448,0.558769,1.02481,1.171837
e,-0.925874,1.862864,-1.133817,0.610478,-0.315396


In [249]:
df3

Unnamed: 0_level_0,w,x,y,z
newidx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,0.38603,2.084019,-0.376519,0.230336
NY,0.681209,1.035125,-0.03116,1.939932
WY,-1.005187,-0.74179,0.187125,-0.732845
OR,-1.38292,1.482495,0.961458,-2.141212
CO,0.992573,1.192241,-1.04678,1.292765


## Multi-Index and Index Hierarchy

Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:

In [156]:
# Index Levels
outside = ['M1', 'M1', 'M1', 'M2', 'M2', 'M2','M3', 'M3', 'M3']
inside = [1, 2, 3, 1, 2, 3, 5, 6, 7]
multi_index = list(zip(outside, inside))
multi_index

[('M1', 1),
 ('M1', 2),
 ('M1', 3),
 ('M2', 1),
 ('M2', 2),
 ('M2', 3),
 ('M3', 5),
 ('M3', 6),
 ('M3', 7)]

In [157]:
hier_index=pd.MultiIndex.from_tuples(multi_index)

In [158]:
hier_index

MultiIndex([('M1', 1),
            ('M1', 2),
            ('M1', 3),
            ('M2', 1),
            ('M2', 2),
            ('M2', 3),
            ('M3', 5),
            ('M3', 6),
            ('M3', 7)],
           )

In [159]:
df5=pd.DataFrame(np.random.randn(9,4), index = hier_index, columns=['A','B','C','D'])
df5

Unnamed: 0,Unnamed: 1,A,B,C,D
M1,1,-0.344431,-1.785068,0.530949,0.628364
M1,2,2.283261,0.148912,0.536175,0.144883
M1,3,-0.627879,1.151898,0.25937,-1.186527
M2,1,-0.673861,-1.555119,-0.566604,-0.790608
M2,2,0.230762,1.028012,-0.888242,-2.141687
M2,3,-0.976076,1.414672,-1.562469,-0.676467
M3,5,-1.617897,-1.818591,0.171447,0.37775
M3,6,1.049934,-0.526008,-0.304556,-0.484535
M3,7,0.053539,-2.024119,0.796184,2.457297


Now let's show how to index this! For index hierarchy we use ``df.loc[]``, if this was on the columns axis, you would just use normal bracket notation ``df[]``. Calling one level of the index returns the sub-dataframe:

In [165]:
df5.loc['M3','B']

5   -1.818591
6   -0.526008
7   -2.024119
Name: B, dtype: float64

Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


In [274]:
df5.loc['M2'].loc[2]

A   -0.703198
B    2.067214
C    0.455452
D   -1.123753
Name: 2, dtype: float64

In [166]:
df5.loc['']

Unnamed: 0,Unnamed: 1,A,B,C,D
M1,1,-0.344431,-1.785068,0.530949,0.628364
M1,2,2.283261,0.148912,0.536175,0.144883
M1,3,-0.627879,1.151898,0.25937,-1.186527
M2,1,-0.673861,-1.555119,-0.566604,-0.790608
M2,2,0.230762,1.028012,-0.888242,-2.141687
M2,3,-0.976076,1.414672,-1.562469,-0.676467
M3,5,-1.617897,-1.818591,0.171447,0.37775
M3,6,1.049934,-0.526008,-0.304556,-0.484535
M3,7,0.053539,-2.024119,0.796184,2.457297


Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,-0.703198,2.067214,0.455452,-1.123753


In [262]:
df5.index.names

FrozenList([None, None])

In [263]:
df5.index.names = ['Group','Num']

In [264]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,1,-2.288978,0.441018,0.656973,0.136999
M1,2,-0.451982,-1.094163,-0.006354,-1.403832
M1,3,-0.202026,-1.089143,1.406601,1.577691
M2,1,0.545812,-0.693707,-0.389968,-0.170756
M2,2,-0.703198,2.067214,0.455452,-1.123753
M2,3,-1.088266,-0.609936,-0.635511,-0.728053
M3,5,-0.594951,-0.593371,0.911416,-1.768073
M3,6,0.205247,0.211945,-1.240039,1.0493
M3,7,-0.410414,0.340754,0.805027,0.528188


### let's take a quick look at the [``.xs()``](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-xs.ipynb)

In [168]:
df5.loc['M3']

Unnamed: 0,A,B,C,D
5,-1.617897,-1.818591,0.171447,0.37775
6,1.049934,-0.526008,-0.304556,-0.484535
7,0.053539,-2.024119,0.796184,2.457297


Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


In [267]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,1,-2.288978,0.441018,0.656973,0.136999
M1,2,-0.451982,-1.094163,-0.006354,-1.403832
M1,3,-0.202026,-1.089143,1.406601,1.577691
M2,1,0.545812,-0.693707,-0.389968,-0.170756
M2,2,-0.703198,2.067214,0.455452,-1.123753
M2,3,-1.088266,-0.609936,-0.635511,-0.728053
M3,5,-0.594951,-0.593371,0.911416,-1.768073
M3,6,0.205247,0.211945,-1.240039,1.0493
M3,7,-0.410414,0.340754,0.805027,0.528188


In [170]:
df5.loc['M2',2]

A    0.230762
B    1.028012
C   -0.888242
D   -2.141687
Name: (M2, 2), dtype: float64

A   -0.703198
B    2.067214
C    0.455452
D   -1.123753
Name: (M2, 2), dtype: float64

Unnamed: 0_level_0,A,B,C,D
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1,-0.202026,-1.089143,1.406601,1.577691
M2,-1.088266,-0.609936,-0.635511,-0.728053


In [171]:
df5.xs(3, level = 'Num')

KeyError: 'Level Num not found'

Unnamed: 0_level_0,A,B,C,D
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1,-0.202026,-1.089143,1.406601,1.577691
M2,-1.088266,-0.609936,-0.635511,-0.728053


In [272]:
df5.xs('A',axis=1)

Group  Num
M1     1     -2.288978
       2     -0.451982
       3     -0.202026
M2     1      0.545812
       2     -0.703198
       3     -1.088266
M3     5     -0.594951
       6      0.205247
       7     -0.410414
Name: A, dtype: float64