In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.arange(0,60).reshape(12,5),index=list("abcdefghijkl"), columns = list("ABCDE"))


#### Getting


In [3]:
# Selecting a single column, which yields a Series, equivalent to df.A:
df[0:7]

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24
f,25,26,27,28,29
g,30,31,32,33,34


In [4]:
# Selecting via [] (__getitem__), which slices the rows:
df["a":"f"]

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24
f,25,26,27,28,29


#### Selection by label

In [5]:
# See more in Selection by Label using DataFrame.loc() or DataFrame.at().

# For getting a cross section using a label:

df.loc["a"]

A    0
B    1
C    2
D    3
E    4
Name: a, dtype: int32

In [6]:
# Selecting on a multi-axis by label:
df.loc[:, ["A", "B"]]

Unnamed: 0,A,B
a,0,1
b,5,6
c,10,11
d,15,16
e,20,21
f,25,26
g,30,31
h,35,36
i,40,41
j,45,46


In [7]:
# Showing label slicing, both endpoints are included:
df.loc["a":"f", ["C", "D"]]

Unnamed: 0,C,D
a,2,3
b,7,8
c,12,13
d,17,18
e,22,23
f,27,28


In [8]:
# Reduction in the dimensions of the returned object:
df.loc["a", ["A", "E"]]

A    0
E    4
Name: a, dtype: int32

In [9]:
# For getting a scalar value:
df.loc["a", "A"]

0

In [10]:
# For getting fast access to a scalar (equivalent to the prior method):
df.at["f", "E"]

29

#### Selection by position

In [11]:
# See more in Selection by Position using DataFrame.iloc() or DataFrame.at().

# Select via the position of the passed integers:
df.iloc[2]

A    10
B    11
C    12
D    13
E    14
Name: c, dtype: int32

In [12]:
# By integer slices, acting similar to NumPy/Python:
df.iloc[0:3,0:-1]

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,5,6,7,8
c,10,11,12,13


In [13]:
# By lists of integer position locations, similar to the NumPy/Python style:
df.iloc[[1,3,4], [0,3]]

Unnamed: 0,A,D
b,5,8
d,15,18
e,20,23


#### Boolean indexing

In [14]:
# Using a single column’s values to select data:
df[(df.D <10) & (df.B < 6)]

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4


In [15]:
# Using a single column’s values to select data:
df[df> 50]

Unnamed: 0,A,B,C,D,E
a,,,,,
b,,,,,
c,,,,,
d,,,,,
e,,,,,
f,,,,,
g,,,,,
h,,,,,
i,,,,,
j,,,,,


In [16]:
# Using the isin() method for filtering:
df[df.A.isin([0,1])]

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,4


#### Setting

In [17]:
# Setting a new column automatically aligns the data by the indexes:
s1 = pd.Series([x for x in range(1, 13)], index=pd.date_range("20130102", periods=12))
s1
df["F"] = s1
df

Unnamed: 0,A,B,C,D,E,F
a,0,1,2,3,4,
b,5,6,7,8,9,
c,10,11,12,13,14,
d,15,16,17,18,19,
e,20,21,22,23,24,
f,25,26,27,28,29,
g,30,31,32,33,34,
h,35,36,37,38,39,
i,40,41,42,43,44,
j,45,46,47,48,49,


In [18]:

# Setting values by label:
df.at["a", "A"] = 45
df

Unnamed: 0,A,B,C,D,E,F
a,45,1,2,3,4,
b,5,6,7,8,9,
c,10,11,12,13,14,
d,15,16,17,18,19,
e,20,21,22,23,24,
f,25,26,27,28,29,
g,30,31,32,33,34,
h,35,36,37,38,39,
i,40,41,42,43,44,
j,45,46,47,48,49,


In [22]:
# Setting values by position:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D,E,F
a,45,0,2,3,4,
b,5,6,7,8,9,
c,10,11,12,13,14,
d,15,16,17,18,19,
e,20,21,22,23,24,
f,25,26,27,28,29,
g,30,31,32,33,34,
h,35,36,37,38,39,
i,40,41,42,43,44,
j,45,46,47,48,49,


In [25]:
# Setting by assigning with a NumPy array:
df["F"] = np.array([3]*len(df))
df

Unnamed: 0,A,B,C,D,E,F
a,45,0,2,3,4,3
b,5,6,7,8,9,3
c,10,11,12,13,14,3
d,15,16,17,18,19,3
e,20,21,22,23,24,3
f,25,26,27,28,29,3
g,30,31,32,33,34,3
h,35,36,37,38,39,3
i,40,41,42,43,44,3
j,45,46,47,48,49,3
