# Chapter 4: Selecting Subsets of Data
## Recipes
* [Selecting Series data](#Selecting-Series-data)
* [Selecting DataFrame rows](#Selecting-DataFrame-rows)
* [Selecting DataFrame rows and columns simultaneously](#Selecting-DataFrame-rows-and-columns-simultaneously)
* [Selecting data with both integers and labels](#Selecting-data-with-both-integers-and-labels)
* [Speeding up scalar selection](#Speeding-up-scalar-selection)
* [Slicing rows lazily](#Slicing-rows-lazily)
* [Slicing lexicographically](#Slicing-Lexicographically)

In [None]:
import pandas as pd
import numpy as np

# Selecting Series data

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
city = college['CITY']
city.head()

In [None]:
city.iloc[3]

In [None]:
city.iloc[[10,20,30]]

In [None]:
city.iloc[4:50:10]

In [None]:
city.loc['Heritage Christian University']

In [None]:
np.random.seed(1)
labels = list(np.random.choice(city.index, 4))
labels

In [None]:
city.loc[labels]

In [None]:
city.loc['Alabama State University':'Reid State Technical College':10]

In [None]:
city['Alabama State University':'Reid State Technical College':10]

## There's more...

In [None]:
city.iloc[[3]]

In [None]:
city.loc['Reid State Technical College':'Alabama State University':10]

In [None]:
city.loc['Reid State Technical College':'Alabama State University':-10]

# Selecting DataFrame rows

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college.head()

In [None]:
pd.options.display.max_rows = 6

In [None]:
college.iloc[60]

In [None]:
college.loc['University of Alaska Anchorage']

In [None]:
college.iloc[[60, 99, 3]]

In [None]:
labels = ['University of Alaska Anchorage',
          'International Academy of Hair Design',
          'University of Alabama in Huntsville']
college.loc[labels]

In [None]:
college.iloc[99:102]

In [None]:
start = 'International Academy of Hair Design'
stop = 'Mesa Community College'
college.loc[start:stop]

# There's more...

In [None]:
college.iloc[[60, 99, 3]].index.tolist()

# Selecting DataFrame rows and columns simultaneously

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college.iloc[:3, :4]

In [None]:
college.loc[:'Amridge University', :'MENONLY']

In [None]:
college.iloc[:, [4,6]].head()

In [None]:
college.loc[:, ['WOMENONLY', 'SATVRMID']]

In [None]:
college.iloc[[100, 200], [7, 15]]

In [None]:
rows = ['GateWay Community College', 'American Baptist Seminary of the West']
columns = ['SATMTMID', 'UGDS_NHPI']
college.loc[rows, columns]

In [None]:
college.iloc[5, -4]

In [None]:
college.loc['The University of Alabama', 'PCTFLOAN']

In [None]:
college.iloc[90:80:-2, 5]

In [None]:
start = 'Empire Beauty School-Flagstaff'
stop = 'Arizona State University-Tempe'
college.loc[start:stop:-2, 'RELAFFIL']

# Selecting data with both integers and labels

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')

In [None]:
col_start = college.columns.get_loc('UGDS_WHITE')
col_end = college.columns.get_loc('UGDS_UNKN') + 1
col_start, col_end

In [None]:
college.iloc[:5, col_start:col_end]

# There's more...

In [None]:
row_start = college.index[10]
row_end = college.index[15]
college.loc[row_start:row_end, 'UGDS_WHITE':'UGDS_UNKN']

# Speeding up scalar selection

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
cn = 'Texas A & M University-College Station'
college.loc[cn, 'UGDS_WHITE']

In [None]:
college.at[cn, 'UGDS_WHITE']

In [None]:
%timeit college.loc[cn, 'UGDS_WHITE']

In [None]:
%timeit college.at[cn, 'UGDS_WHITE']

In [None]:
row_num = college.index.get_loc(cn)
col_num = college.columns.get_loc('UGDS_WHITE')

In [None]:
row_num, col_num

In [None]:
%timeit college.iloc[row_num, col_num]

In [None]:
%timeit college.iat[row_num, col_num]

In [None]:
%timeit college.iloc[5, col_num]

In [None]:
%timeit college.iat[5, col_num]

## There's more...

In [None]:
state = college['STABBR']

In [None]:
state.iat[1000]

In [None]:
state.at['Stanford University']

# Slicing rows lazily

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college[10:20:2]

In [None]:
city[10:20:2]

In [None]:
college.index[4001]

In [None]:
start = 'Mesa Community College'
stop = 'Spokane Community College'
college[start:stop:1500]

In [None]:
city[start:stop:1500]

## There's more...

In [None]:
college[:10, ['CITY', 'STABBR']]

In [None]:
first_ten_instnm = college.index[:10]
college.loc[first_ten_instnm, ['CITY', 'STABBR']]

# Slicing Lexicographically

In [None]:
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college.loc['Sp':'Su']

In [None]:
college = college.sort_index()

In [None]:
college.head()

In [None]:
pd.options.display.max_rows = 6

In [None]:
college.loc['Sp':'Su']

In [None]:
college = college.sort_index(ascending=False)
college.index.is_monotonic_decreasing

In [None]:
college.loc['E':'B']

In [None]:
college.loc['E':'B']