## Libraries

In [3]:
import pandas as pd
import numpy as np

## Developing DataFrame

In [4]:
data = {'state':['Ohio','Ohio','Ohio','Nevada','Nevada','Nevada'],
       'year':[2000, 2001, 2002, 2001, 2002, 2003],
       'pop':[1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [5]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


## Basic Difference
* loc looks for the data based on label name
* iloc looks for the data based on index value

In [23]:
## reindexing to reverse
newIndex=frame.index.values.tolist()
newIndex.reverse()

In [24]:
frame_new = frame.reindex(newIndex)
frame_new

Unnamed: 0,state,year,pop
5,Nevada,2003,3.2
4,Nevada,2002,2.9
3,Nevada,2001,2.4
2,Ohio,2002,3.6
1,Ohio,2001,1.7
0,Ohio,2000,1.5


In [25]:
## Label basiert indexing
frame_new.loc[1]

state    Ohio
year     2001
pop       1.7
Name: 1, dtype: object

In [27]:
### Position basiert indexing
frame_new.iloc[1]

state    Nevada
year       2002
pop         2.9
Name: 4, dtype: object

## Slicing

In [29]:
frame_new.loc[[1,2]]

Unnamed: 0,state,year,pop
1,Ohio,2001,1.7
2,Ohio,2002,3.6


In [30]:
frame_new.loc[3:1]

Unnamed: 0,state,year,pop
3,Nevada,2001,2.4
2,Ohio,2002,3.6
1,Ohio,2001,1.7


In [32]:
frame_new.iloc[1]

state    Nevada
year       2002
pop         2.9
Name: 4, dtype: object

In [33]:
frame_new.iloc[[1,2]]

Unnamed: 0,state,year,pop
4,Nevada,2002,2.9
3,Nevada,2001,2.4


In [34]:
frame_new.iloc[1:3]

Unnamed: 0,state,year,pop
4,Nevada,2002,2.9
3,Nevada,2001,2.4


## Further Actions

### 1. selecting single column
* loc is the winner

In [161]:
%time frame['state']

CPU times: user 164 µs, sys: 0 ns, total: 164 µs
Wall time: 169 µs


0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [162]:
%time frame.loc[:,'state']

CPU times: user 272 µs, sys: 0 ns, total: 272 µs
Wall time: 280 µs


0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [163]:
%time frame.iloc[:,[1]]

CPU times: user 3.02 ms, sys: 0 ns, total: 3.02 ms
Wall time: 3 ms


Unnamed: 0,state
0,Ohio
1,Ohio
2,Ohio
3,Nevada
4,Nevada
5,Nevada


### 2. selecting single row
* all column for row:2 
  
  ```2	three	Ohio	2002	3.6	2.0```
* iloc is the winner

In [159]:
%time frame[frame.index==2]

CPU times: user 2.03 ms, sys: 330 µs, total: 2.36 ms
Wall time: 2.79 ms


Unnamed: 0,index,state,year,pop,debt
2,three,Ohio,2002,3.6,2.0


In [168]:
%time frame.loc[2]

CPU times: user 911 µs, sys: 0 ns, total: 911 µs
Wall time: 923 µs


index    three
state     Ohio
year      2002
pop        3.6
debt         2
Name: 2, dtype: object

In [170]:
%time frame.iloc[2]

CPU times: user 832 µs, sys: 0 ns, total: 832 µs
Wall time: 846 µs


index    three
state     Ohio
year      2002
pop        3.6
debt         2
Name: 2, dtype: object

### 3. selecting single column,row
* frame at column:'pop', index:2 -> 3.6
* dataframe[][] is the winner, since it just returns an np.float, however, loc/iloc returns series/dataframe

In [180]:
%time frame['pop'][2]

CPU times: user 138 µs, sys: 21 µs, total: 159 µs
Wall time: 168 µs


3.6

In [177]:
%time frame.loc[[2],'pop']

CPU times: user 1.41 ms, sys: 218 µs, total: 1.62 ms
Wall time: 1.53 ms


2    3.6
Name: pop, dtype: float64

In [175]:
%time frame.iloc[[2],[3]]

CPU times: user 9.68 ms, sys: 165 µs, total: 9.85 ms
Wall time: 11.1 ms


Unnamed: 0,pop
2,3.6


### 4. Selecting two columns
* frame: state, year
* iloc is the clear winner

In [196]:
%time frame[['state','year']]

CPU times: user 2.32 ms, sys: 0 ns, total: 2.32 ms
Wall time: 2.28 ms


Unnamed: 0,state,year
0,Ohio,2001
1,Ohio,2000
2,Ohio,2002
3,Nevada,2001
4,Nevada,2003
5,Nevada,2002


In [197]:
%time frame.loc[:,['state','year']]

CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 7.54 ms


Unnamed: 0,state,year
0,Ohio,2001
1,Ohio,2000
2,Ohio,2002
3,Nevada,2001
4,Nevada,2003
5,Nevada,2002


In [198]:
%time frame.iloc[:, [1,2]]

CPU times: user 635 µs, sys: 100 µs, total: 735 µs
Wall time: 699 µs


Unnamed: 0,state,year
0,Ohio,2001
1,Ohio,2000
2,Ohio,2002
3,Nevada,2001
4,Nevada,2003
5,Nevada,2002


### 5. Selecting two rows
```
2	three	Ohio	2002	3.6	2.0
3	four	Nevada	2001	2.4	3.0
```

In [220]:
%time frame[frame.index.isin([2,3])]

CPU times: user 1.8 ms, sys: 280 µs, total: 2.08 ms
Wall time: 2.02 ms


Unnamed: 0,index,state,year,pop,debt
2,three,Ohio,2002,3.6,2.0
3,four,Nevada,2001,2.4,3.0


In [221]:
%time frame.loc[[2,3], :]

CPU times: user 2.29 ms, sys: 0 ns, total: 2.29 ms
Wall time: 2.2 ms


Unnamed: 0,index,state,year,pop,debt
2,three,Ohio,2002,3.6,2.0
3,four,Nevada,2001,2.4,3.0


In [222]:
%time frame.iloc[[2,3],:]

CPU times: user 3.15 ms, sys: 0 ns, total: 3.15 ms
Wall time: 2.96 ms


Unnamed: 0,index,state,year,pop,debt
2,three,Ohio,2002,3.6,2.0
3,four,Nevada,2001,2.4,3.0


### 6. Conditional selection of index
* Selecting Odd index

In [225]:
%time frame[lambda x:x.index %2 !=0]

CPU times: user 1.12 ms, sys: 173 µs, total: 1.3 ms
Wall time: 1.25 ms


Unnamed: 0,index,state,year,pop,debt
1,one,Ohio,2000,1.5,0.0
3,four,Nevada,2001,2.4,3.0
5,five,Nevada,2002,2.9,4.0


In [224]:
%time frame.loc[lambda x: x.index %2 !=0]

CPU times: user 5.68 ms, sys: 0 ns, total: 5.68 ms
Wall time: 6.02 ms


Unnamed: 0,index,state,year,pop,debt
1,one,Ohio,2000,1.5,0.0
3,four,Nevada,2001,2.4,3.0
5,five,Nevada,2002,2.9,4.0


In [223]:
%time frame.iloc[lambda x: x.index % 2 != 0]

CPU times: user 2.68 ms, sys: 0 ns, total: 2.68 ms
Wall time: 2.63 ms


Unnamed: 0,index,state,year,pop,debt
1,one,Ohio,2000,1.5,0.0
3,four,Nevada,2001,2.4,3.0
5,five,Nevada,2002,2.9,4.0
