# Select from DataFrame in Pandas

Selecting subsets of a pandas DataFrame by index name, index number, or a combination of the two.

In [3]:
import pandas as pd
import numpy as np

## Create some data

In [6]:
dat = {'x': np.array(np.random.randn(10)), 
      'y': np.array(np.random.randn(10)),
      'z': np.array(np.random.randn(10))}

df = pd.DataFrame(dat)
df

Unnamed: 0,x,y,z
0,-0.970676,0.022743,-0.908223
1,1.430364,1.859994,0.764382
2,1.014552,0.567665,1.722854
3,-0.701633,0.552635,-0.745384
4,0.306508,-0.21052,2.459138
5,-0.129986,-0.915758,0.155168
6,-0.371922,-0.204974,-0.210158
7,2.135655,0.183849,0.4863
8,-0.058303,-0.157851,0.343819
9,-1.445776,0.700987,0.013517


## Select by index

### Select single element

In [11]:
df.iloc[0, 0]

-0.97067622464362835

### Select entire column

In [13]:
df.iloc[:, 1]

0    0.022743
1    1.859994
2    0.567665
3    0.552635
4   -0.210520
5   -0.915758
6   -0.204974
7    0.183849
8   -0.157851
9    0.700987
Name: y, dtype: float64

### Select from DataFrame by slicing

In [19]:
df.iloc[:5, 0:2]

Unnamed: 0,x,y
0,-0.970676,0.022743
1,1.430364,1.859994
2,1.014552,0.567665
3,-0.701633,0.552635
4,0.306508,-0.21052


## Select by name

### Select single column

In [20]:
df.loc[:, "x"]

0   -0.970676
1    1.430364
2    1.014552
3   -0.701633
4    0.306508
5   -0.129986
6   -0.371922
7    2.135655
8   -0.058303
9   -1.445776
Name: x, dtype: float64

### Select multiple (list of) columns

In [26]:
df.loc[:, ["x", "z"]]

Unnamed: 0,x,z
0,-0.970676,-0.908223
1,1.430364,0.764382
2,1.014552,1.722854
3,-0.701633,-0.745384
4,0.306508,2.459138
5,-0.129986,0.155168
6,-0.371922,-0.210158
7,2.135655,0.4863
8,-0.058303,0.343819
9,-1.445776,0.013517


### Select multiple (list of) rows

This works because here the row names correspond to the index numbers

In [32]:
df.loc[[0,1,2], :]

Unnamed: 0,x,y,z
0,-0.970676,0.022743,-0.908223
1,1.430364,1.859994,0.764382
2,1.014552,0.567665,1.722854


## Select by row number and column name (ie mix selectors)

In [35]:
df.loc[df.index[2:6], ['x', 'y']]

Unnamed: 0,x,y
2,1.014552,0.567665
3,-0.701633,0.552635
4,0.306508,-0.21052
5,-0.129986,-0.915758


This follows the convention:

```df.loc[df.index[num], [list of column names]```