In [1]:
import pandas as pd

### Create an example dataframe

In [2]:
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'year': [2012, 2012, 2013, 2014, 2014],
        'reports': [4, 24, 31, 2, 3]}

In [3]:
data

{'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
 'year': [2012, 2012, 2013, 2014, 2014],
 'reports': [4, 24, 31, 2, 3]}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,name,year,reports
0,Jason,2012,4
1,Molly,2012,24
2,Tina,2013,31
3,Jake,2014,2
4,Amy,2014,3


In [5]:
df = pd.DataFrame(data, index = ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
df

Unnamed: 0,name,year,reports
Cochice,Jason,2012,4
Pima,Molly,2012,24
Santa Cruz,Tina,2013,31
Maricopa,Jake,2014,2
Yuma,Amy,2014,3


In [6]:
df.index

Index(['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'], dtype='object')

In [7]:
df.columns

Index(['name', 'year', 'reports'], dtype='object')

In [8]:
df.values

array([['Jason', 2012, 4],
       ['Molly', 2012, 24],
       ['Tina', 2013, 31],
       ['Jake', 2014, 2],
       ['Amy', 2014, 3]], dtype=object)

### Selecting a single column as a Series

In [9]:
df["name"]

Cochice       Jason
Pima          Molly
Santa Cruz     Tina
Maricopa       Jake
Yuma            Amy
Name: name, dtype: object

### Selecting a multiple column as a Series

In [10]:
df[["name","year"]]

Unnamed: 0,name,year
Cochice,Jason,2012
Pima,Molly,2012
Santa Cruz,Tina,2013
Maricopa,Jake,2014
Yuma,Amy,2014


### Select row as a Series with .loc

In [11]:
df.loc['Pima']

name       Molly
year        2012
reports       24
Name: Pima, dtype: object

In [12]:
df.loc[['Pima','Santa Cruz']]

Unnamed: 0,name,year,reports
Pima,Molly,2012,24
Santa Cruz,Tina,2013,31


### Use slice notation to select a range of rows with .loc

In [13]:
df.loc['Pima':'Maricopa']

Unnamed: 0,name,year,reports
Pima,Molly,2012,24
Santa Cruz,Tina,2013,31
Maricopa,Jake,2014,2


### Select two rows and three columns

In [14]:
df.loc[['Pima','Santa Cruz'],['name','year','reports']]

Unnamed: 0,name,year,reports
Pima,Molly,2012,24
Santa Cruz,Tina,2013,31


### Selecting all of the rows and some columns

In [15]:
df.loc[:,['year','reports']]

Unnamed: 0,year,reports
Cochice,2012,4
Pima,2012,24
Santa Cruz,2013,31
Maricopa,2014,2
Yuma,2014,3


### Selecting a single row with .iloc - index starts from 0 here

In [16]:
df.iloc[[3,2,4]]

Unnamed: 0,name,year,reports
Maricopa,Jake,2014,2
Santa Cruz,Tina,2013,31
Yuma,Amy,2014,3


### Use slice notation to select a range of rows with .iloc

In [17]:
df.iloc[3:5]

Unnamed: 0,name,year,reports
Maricopa,Jake,2014,2
Yuma,Amy,2014,3


### Use comparison operator with a single column of data

In [18]:
criteria = df.year >= 2013
df[criteria]

Unnamed: 0,name,year,reports
Santa Cruz,Tina,2013,31
Maricopa,Jake,2014,2
Yuma,Amy,2014,3


### Boolean selection in one line

In [19]:
df[(df['year'] >= 2013) & (df.name == 'Tina')]

Unnamed: 0,name,year,reports
Santa Cruz,Tina,2013,31


### Combining isin with other criteria

In [20]:
df[df.year.isin([2013,2014])]

Unnamed: 0,name,year,reports
Santa Cruz,Tina,2013,31
Maricopa,Jake,2014,2
Yuma,Amy,2014,3


In [21]:
df[df.name.isin(['Tina','Amy'])]

Unnamed: 0,name,year,reports
Santa Cruz,Tina,2013,31
Yuma,Amy,2014,3
