In [1]:
import numpy as np
import pandas as pd

# Handling NaN in a dataframe

In [24]:
dt = {'Rollno': [101, np.nan, 103], 'Name': ['Ram', np.nan, 'Bharat'], 'Percent': [85.6,77.5, np.nan]}
dt

{'Rollno': [101, nan, 103],
 'Name': ['Ram', nan, 'Bharat'],
 'Percent': [85.6, 77.5, nan]}

In [4]:
df = pd.DataFrame(dt)

In [5]:
df

Unnamed: 0,Rollno,Name,Percent
0,101.0,Ram,85.6
1,,,77.5
2,103.0,Bharat,


In [7]:
df.fillna(0)

Unnamed: 0,Rollno,Name,Percent
0,101.0,Ram,85.6
1,0.0,0,77.5
2,103.0,Bharat,0.0


Replacing the nan occurances of every column with a single value can be non-relevant sometimes

In [6]:
df.fillna({'Rollno': 100, 'Name': 'abc', 'Percent': 40})

Unnamed: 0,Rollno,Name,Percent
0,101.0,Ram,85.6
1,100.0,abc,77.5
2,103.0,Bharat,40.0


# Hierarchical Indexing

In [10]:
series = pd.Series(np.arange(6), index=[[1, 1, 1, 2, 2, 3], [1.1, 1.2, 1.3, 2.1, 2.2, 3.1]])
series

1  1.1    0
   1.2    1
   1.3    2
2  2.1    3
   2.2    4
3  3.1    5
dtype: int32

In [15]:
series.index

MultiIndex([(1, 1.1),
            (1, 1.2),
            (1, 1.3),
            (2, 2.1),
            (2, 2.2),
            (3, 3.1)],
           )

In [11]:
mser = pd.Series(np.random.rand(8), index=[['white','white','white','blue','blue','red','red','red'], ['up','down','right','up','down','up','down','left']])
mser

white  up       0.177694
       down     0.761223
       right    0.243044
blue   up       0.549924
       down     0.540168
red    up       0.764811
       down     0.123913
       left     0.951730
dtype: float64

In [16]:
series[0:1]

1  1.1    0
dtype: int32

In [17]:
series[3]

3.1    5
dtype: int32

In [18]:
series[1]

1.1    0
1.2    1
1.3    2
dtype: int32

In [19]:
series[1:3]

1  1.2    1
   1.3    2
dtype: int32

In [20]:
series[1:]

1  1.2    1
   1.3    2
2  2.1    3
   2.2    4
3  3.1    5
dtype: int32

In [21]:
series[2,2.2]

4

In [22]:
series[1,1.3]

2

In [23]:
series[1,]

1.1    0
1.2    1
1.3    2
dtype: int32

The hierarchical indexing plays a critical role in reshaping the data and group-based operations such
as creating a pivot-table. For example, the data could be used just rearranged in a data frame using a special
function called **unstack( )**. This function converts the Series with hierarchical index in a simple DataFrame,
where the second set of indexes is converted into a new set of columns. 

In [25]:
series.unstack()

Unnamed: 0,1.1,1.2,1.3,2.1,2.2,3.1
1,0.0,1.0,2.0,,,
2,,,,3.0,4.0,
3,,,,,,5.0


If what we want is to perform the reverse operation, which is to convert a DataFrame in a Series, you will
use the **stack()** function.


In [26]:
df

Unnamed: 0,Rollno,Name,Percent
0,101.0,Ram,85.6
1,,,77.5
2,103.0,Bharat,


In [27]:
df.stack()

0  Rollno      101.0
   Name          Ram
   Percent      85.6
1  Percent      77.5
2  Rollno      103.0
   Name       Bharat
dtype: object

As regards to the **DataFrame**, it is possible to define a **hierarchical index** both for the rows and for the
columns. At the time of the declaration of the DataFrame, you have to define an array of arrays for both the
index option and the columns option.

# Multilevel Indexing in a dataframe

In [33]:
mframe = pd.DataFrame(np.arange(16).reshape(4,4), index=[['white','white','red','red'], ['up','down','up','down']],columns=[['pen','pen','paper','paper'],['DOT', 'INK', 'NEWS', 'EXAM']])
mframe

Unnamed: 0_level_0,Unnamed: 1_level_0,pen,pen,paper,paper
Unnamed: 0_level_1,Unnamed: 1_level_1,DOT,INK,NEWS,EXAM
white,up,0,1,2,3
white,down,4,5,6,7
red,up,8,9,10,11
red,down,12,13,14,15


In [35]:
mframe.columns.names = ['Object', 'Type']
mframe

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Type,DOT,INK,NEWS,EXAM
white,up,0,1,2,3
white,down,4,5,6,7
red,up,8,9,10,11
red,down,12,13,14,15


In [37]:
mframe.index.names = ['Colors', 'Status']
mframe

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Type,DOT,INK,NEWS,EXAM
Colors,Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
white,up,0,1,2,3
white,down,4,5,6,7
red,up,8,9,10,11
red,down,12,13,14,15


Occasionally, you could need to rearrange the order of the levels on an axis or do a sorting for values at a
specific level.
The **swaplevel()** function accepts as argument the names assigned to the two levels that you want to
interchange, and returns a new object with the two levels interchanged between them, while leaving the data
unmodified.

In [38]:
mframe.swaplevel('Colors', 'Status')

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Type,DOT,INK,NEWS,EXAM
Status,Colors,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
up,white,0,1,2,3
down,white,4,5,6,7
up,red,8,9,10,11
down,red,12,13,14,15


In [40]:
mframe.pd.sortlevel('Colors')

AttributeError: 'DataFrame' object has no attribute 'pd'

In [41]:
mframe.sum()

Object  Type
pen     DOT     24
        INK     28
paper   NEWS    32
        EXAM    36
dtype: int64

In [42]:
mframe

Unnamed: 0_level_0,Object,pen,pen,paper,paper
Unnamed: 0_level_1,Type,DOT,INK,NEWS,EXAM
Colors,Status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
white,up,0,1,2,3
white,down,4,5,6,7
red,up,8,9,10,11
red,down,12,13,14,15
