# Pandas DataFrame.loc()

    df.loc() is mostly about the df.loc('row') but
    Colon-comma makes your d.loc(:, 'column') show.
    
So, the API for .loc() is mostly just this:

    df.loc[row-label, col-label]

So for the very common simple task of selecting columns by label, you need to know some special Pythonic magic: 

    df.loc[:, col-label]

This colon is Python short-hand for a slice from beginning to end... or in other words "all rows". This is the single most confusing part of the Pandas API, which becoming more and more important because the old ways of selecting rows are deprecated. 

In [1]:
import pandas as pd

pd.set_option('display.max_columns', 500)

def a1_notation(n):
    string = ""
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        string = chr(65 + remainder) + string
    return string

# First we create a 30 x 30 DataFrame with both row and column labels.
alist = list(range(1, 31))
A1_list = [a1_notation(x) for x in alist]
A1_list_plus = ["%s-label" % a1_notation(x) for x in alist]
df = pd.DataFrame([alist for aline in alist], columns=A1_list, index=A1_list_plus)
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD
A-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
B-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
C-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
D-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
E-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
F-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
G-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
H-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
I-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
J-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30


In [2]:
# Using a single row-label with .loc() returns a the row as a Pandas Series.
df.loc['A-label']

A      1
B      2
C      3
D      4
E      5
F      6
G      7
H      8
I      9
J     10
K     11
L     12
M     13
N     14
O     15
P     16
Q     17
R     18
S     19
T     20
U     21
V     22
W     23
X     24
Y     25
Z     26
AA    27
AB    28
AC    29
AD    30
Name: A-label, dtype: int64

## Let's Talk about Rows

In [4]:
# Using a row-label and a column-label in .loc() returns the contents of the cell.
df.loc['A-label', 'AD']

30

In [5]:
# Using a single row-label INSIDE A LIST in .loc() returns that row as a DataFrame.
df.loc[['A-label']]

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD
A-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30


In [6]:
# Using a list of row-labels in .loc() returns those specific rows as a DataFrame.
df.loc[['A-label', 'C-label', 'E-label', 'G-label', 'I-label']]

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD
A-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
C-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
E-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
G-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
I-label,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30


In [None]:
# Using row-labels with slice notation in .loc() returns that range of rows as a DataFrame.
df.loc['A-label':'G-label']

In [None]:
# Dropping the first row-label in slice notation in .loc() indicates starting at top of DataFrame.
df.loc[:'G-label']

In [None]:
# Dropping the last row-label in slice notation in .loc() indicates ending at bottom of DataFrame.
df.loc['W-label':]

In [None]:
# Dropping both row-labels in slice notation in .loc() selects the entire DataFrame (top to bottom).
df.loc[:]

In [None]:
# As with iloc, there is a comma in the interface which seperates row arguments from column arguments.
df.loc[:, ]

## Let's Talk about Columns

In [7]:
# If we use just a single label-name after the comma (like using an integer in iloc) we get a Pandas Series.
df.loc[:, 'A']

A-label     1
B-label     1
C-label     1
D-label     1
E-label     1
F-label     1
G-label     1
H-label     1
I-label     1
J-label     1
K-label     1
L-label     1
M-label     1
N-label     1
O-label     1
P-label     1
Q-label     1
R-label     1
S-label     1
T-label     1
U-label     1
V-label     1
W-label     1
X-label     1
Y-label     1
Z-label     1
AA-label    1
AB-label    1
AC-label    1
AD-label    1
Name: A, dtype: int64

In [None]:
# Putting a label-name in a list after the comma returns a DataFrame.
df.loc[:, ['A']]

In [None]:
# This is how we perform the very common task of selecting specific columns by label.
df.loc[:, ['P', 'A', 'N', 'D', 'A', 'S']]

In [None]:
# Label-based slice definitions can be used in place of lists.
df.loc[:, 'P':'U']

In [None]:
# After the comma you can leave off the ending slice value for "to the end".
df.loc[:, 'P':]

In [None]:
# After the comma you can leave off the beginning slice value for "from beginning".
df.loc[:, :'N']

## Rows & Columns

In [None]:
# This is how we get the intersection of label-based slice definitions for rows and columns.
df.loc[:'N-label', :'N']

In [None]:
# We can use lists of labels to define intersections. If each list has one item, we get a 1-cell DataFrame
df.loc[['N-label'], ['N']]

In [None]:
# If you're going for a single value, there's not much sense in asking for a DataFrame to be returned.
df.loc['N-label', 'N']

In [None]:
# We can use references (variable names) to make list-intersection requests more readable.
rows = ['P-label', 'A-label', 'N-label', 'D-label', 'A-label', 'S-label']
cols = ['P', 'A', 'N', 'D', 'A', 'S']
df.loc[rows, cols]

In [None]:
# We can use list comprehension and the enumerate and modulus functions to list every other column. 
[(i, x)[1] for i, x in enumerate(list(df.columns)) if i%2]

In [None]:
# This is how we select every other column by label name.
df.loc[:, [(i, x)[1] for i, x in enumerate(list(df.columns)) if i%2]]

In [None]:
# We generate a warning and get a column full of NaN addressing nonexistant columns
df.loc[:,['foo', 'B', 'A', 'R']]

In [15]:
df_blah

Unnamed: 0,D,H,L,N,foo
A-label,4,8,12,14,
B-label,4,8,12,14,
C-label,4,8,12,14,
D-label,4,8,12,14,
E-label,4,8,12,14,
F-label,4,8,12,14,
G-label,4,8,12,14,
H-label,4,8,12,14,
I-label,4,8,12,14,
J-label,4,8,12,14,
