In [2]:
import pandas as pd
import numpy as np

In [4]:
pd_df = pd.DataFrame(np.random.randn(5, 4), 'A B C D E'.split(), '1 2 3 4'.split())

In [5]:
pd_df

Unnamed: 0,1,2,3,4
A,1.057043,-0.975386,-1.683727,-0.358346
B,-2.099307,-0.306552,0.824558,1.316593
C,0.265511,-0.332758,0.645982,-0.216599
D,-0.822473,0.644026,0.565443,1.21804
E,0.264055,-1.860961,1.529302,0.22426


### DF Conditional statements

In [7]:
pd_df[pd_df > 0]

Unnamed: 0,1,2,3,4
A,1.057043,,,
B,,,0.824558,1.316593
C,0.265511,,0.645982,
D,,0.644026,0.565443,1.21804
E,0.264055,,1.529302,0.22426


### Don't return NaNs

In [9]:
# Passing the series rather than whole DataFrame

pd_df[pd_df['1'] > 0]

Unnamed: 0,1,2,3,4
A,1.057043,-0.975386,-1.683727,-0.358346
C,0.265511,-0.332758,0.645982,-0.216599
E,0.264055,-1.860961,1.529302,0.22426


In [10]:
pd_df[pd_df['4'] > 0]

Unnamed: 0,1,2,3,4
B,-2.099307,-0.306552,0.824558,1.316593
D,-0.822473,0.644026,0.565443,1.21804
E,0.264055,-1.860961,1.529302,0.22426


### Applying multiple conditions

In [13]:
pd_df[(pd_df['1'] > 0) & (pd_df['2'] < 0)]

Unnamed: 0,1,2,3,4
A,1.057043,-0.975386,-1.683727,-0.358346
C,0.265511,-0.332758,0.645982,-0.216599
E,0.264055,-1.860961,1.529302,0.22426


### Normal python `and` and `or` don't work because they doesn't have the capability to compare boolean values in a series.

### Resetting the index to normal values of 1, 2, 3...

In [14]:
pd_df.reset_index()

Unnamed: 0,index,1,2,3,4
0,A,1.057043,-0.975386,-1.683727,-0.358346
1,B,-2.099307,-0.306552,0.824558,1.316593
2,C,0.265511,-0.332758,0.645982,-0.216599
3,D,-0.822473,0.644026,0.565443,1.21804
4,E,0.264055,-1.860961,1.529302,0.22426


### Resetting index to some predefined values

In [18]:
pd_df['new'] = 'R S T U V'.split()

In [19]:
pd_df

Unnamed: 0,1,2,3,4,new
A,1.057043,-0.975386,-1.683727,-0.358346,R
B,-2.099307,-0.306552,0.824558,1.316593,S
C,0.265511,-0.332758,0.645982,-0.216599,T
D,-0.822473,0.644026,0.565443,1.21804,U
E,0.264055,-1.860961,1.529302,0.22426,V


In [20]:
pd_df.set_index('new')

Unnamed: 0_level_0,1,2,3,4
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
R,1.057043,-0.975386,-1.683727,-0.358346
S,-2.099307,-0.306552,0.824558,1.316593
T,0.265511,-0.332758,0.645982,-0.216599
U,-0.822473,0.644026,0.565443,1.21804
V,0.264055,-1.860961,1.529302,0.22426


**We use reset index to retain the old indexes**

### Multilevel DataFrame

In [22]:
outside = 'G1 G1 G1 G2 G2 G2'.split()
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))

In [23]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [24]:
# Not important how it is being created.

hier_index = pd.MultiIndex.from_tuples(hier_index)

In [25]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [27]:
df = pd.DataFrame(np.random.randn(6, 2), hier_index, ['A', 'B'])

In [28]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,1.008996,-0.460366
G1,2,-0.378858,0.144003
G1,3,1.381871,-0.996812
G2,1,0.159785,-0.234847
G2,2,-1.942278,0.964434
G2,3,0.652118,0.969432


In [29]:
df.loc['G1']

Unnamed: 0,A,B
1,1.008996,-0.460366
2,-0.378858,0.144003
3,1.381871,-0.996812


In [30]:
# Setting up names of indexes

df.index.names

FrozenList([None, None])

In [31]:
df.index.names = ['group', 'number']

In [32]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
group,number,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,1.008996,-0.460366
G1,2,-0.378858,0.144003
G1,3,1.381871,-0.996812
G2,1,0.159785,-0.234847
G2,2,-1.942278,0.964434
G2,3,0.652118,0.969432
