# Data Frame Selection

In [8]:
import pandas as pd

# Load data into dataframe
df = pd.read_csv("./data/iris-with-header.tsv", delimiter='\t')
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Get coloumns by column name

In [6]:
df['Species'].head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Species, dtype: object

In [7]:
df[['SepalLengthCm', 'Species']].head()

Unnamed: 0,SepalLengthCm,Species
0,5.1,Iris-setosa
1,4.9,Iris-setosa
2,4.7,Iris-setosa
3,4.6,Iris-setosa
4,5.0,Iris-setosa


## Get rows by index

In [17]:
df[0:1]

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa


### Every `5` rows

In [85]:
df[::5]

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
10,5.4,3.7,1.5,0.2,Iris-setosa
15,5.7,4.4,1.5,0.4,Iris-setosa
20,5.4,3.4,1.7,0.2,Iris-setosa
25,5.0,3.0,1.6,0.2,Iris-setosa
30,4.8,3.1,1.6,0.2,Iris-setosa
35,5.0,3.2,1.2,0.2,Iris-setosa
40,5.0,3.5,1.3,0.3,Iris-setosa
45,4.8,3.0,1.4,0.3,Iris-setosa


### Reverse order

In [83]:
df[::-1] 

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
149,5.9,3.0,5.1,1.8,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
145,6.7,3.0,5.2,2.3,Iris-virginica
144,6.7,3.3,5.7,2.5,Iris-virginica
143,6.8,3.2,5.9,2.3,Iris-virginica
142,5.8,2.7,5.1,1.9,Iris-virginica
141,6.9,3.1,5.1,2.3,Iris-virginica
140,6.7,3.1,5.6,2.4,Iris-virginica


## Get elements by `iloc`, `iat`, `ix`, `loc`
[Pandas Documentation](http://pandas.pydata.org/pandas-docs/stable/indexing.html)

`.iloc` Purely **integer-location based** indexing for selection by position.

In [47]:
print(df.iloc[0]) 
print("")
print(df.iloc[0,4])

SepalLengthCm            5.1
SepalWidthCm             3.5
PetalLengthCm            1.4
PetalWidthCm             0.2
Species          Iris-setosa
Name: 0, dtype: object

Iris-setosa


`.loc` is primarily **label based**, but may also be used with a **boolean array**. .loc will raise KeyError when the items are not found.

In [52]:
print(df.loc[0])
print('')
print(df.loc[0, 'Species'])

SepalLengthCm            5.1
SepalWidthCm             3.5
PetalLengthCm            1.4
PetalWidthCm             0.2
Species          Iris-setosa
Name: 0, dtype: object

Iris-setosa


`.ix` supports **mixed integer** and **label** based access.

In [58]:
print(df.ix[0])
print('')
print(df.ix[0,0])
print('')
print(df.ix[0,'Species'])

SepalLengthCm            5.1
SepalWidthCm             3.5
PetalLengthCm            1.4
PetalWidthCm             0.2
Species          Iris-setosa
Name: 0, dtype: object

5.1

Iris-setosa


## Get scalar values with `.iat`, `.at`
Similarly to `loc`, `at` provides **label based scalar** lookups, while, `iat` provides **integer based** lookups analogously to `iloc`

In [63]:
# TODO Series example using s.at

In [62]:
# Can be used with dataframe too
print(df.iat[0,0])
print('')
print(df.at[0,'Species'])

5.1

Iris-setosa


## Conditional selection with `< > = ~`, `.where()`

In [70]:
df[df==5.1]

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,
6,,,,,
7,,,,,
8,,,,,
9,,,,,


In [73]:
df.where(df==5.1)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,
6,,,,,
7,,,,,
8,,,,,
9,,,,,


In [77]:
df.where(df[:]['SepalLengthCm']==5.1)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,
6,,,,,
7,,,,,
8,,,,,
9,,,,,


In [None]:
# More conditional examples