In [2]:
import pandas as pd
import numpy as np

In [49]:
dates = pd.date_range('1/1/2000', periods=8)
dates

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [50]:
df = pd.DataFrame(np.random.randn(8, 4),
   ...:                   index=dates, columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2000-01-01,-1.263342,0.666581,0.484328,-0.631289
2000-01-02,-0.210266,0.669452,0.496605,0.284778
2000-01-03,0.27461,-0.428816,-0.294351,0.955873
2000-01-04,-0.911566,1.020426,-2.228317,0.0787
2000-01-05,0.177414,-0.13487,-1.72824,0.551976
2000-01-06,0.035705,-0.560241,0.5264,-0.64275
2000-01-07,-1.600893,1.655818,-0.480091,-0.374639
2000-01-08,-0.498317,-0.543711,-0.279709,0.931254


In [51]:
s = df['A']

In [52]:
s[dates[5]]

0.03570532497066032

In [53]:
df[['B', 'A']] = df[['A', 'B']]
df

Unnamed: 0,A,B,C,D
2000-01-01,0.666581,-1.263342,0.484328,-0.631289
2000-01-02,0.669452,-0.210266,0.496605,0.284778
2000-01-03,-0.428816,0.27461,-0.294351,0.955873
2000-01-04,1.020426,-0.911566,-2.228317,0.0787
2000-01-05,-0.13487,0.177414,-1.72824,0.551976
2000-01-06,-0.560241,0.035705,0.5264,-0.64275
2000-01-07,1.655818,-1.600893,-0.480091,-0.374639
2000-01-08,-0.543711,-0.498317,-0.279709,0.931254


In [54]:
df[['A', 'B']]

Unnamed: 0,A,B
2000-01-01,0.666581,-1.263342
2000-01-02,0.669452,-0.210266
2000-01-03,-0.428816,0.27461
2000-01-04,1.020426,-0.911566
2000-01-05,-0.13487,0.177414
2000-01-06,-0.560241,0.035705
2000-01-07,1.655818,-1.600893
2000-01-08,-0.543711,-0.498317


In [55]:
df.loc[:, ['B', 'A']] = df[['A', 'B']]

In [56]:
df[['A', 'B']]

Unnamed: 0,A,B
2000-01-01,0.666581,-1.263342
2000-01-02,0.669452,-0.210266
2000-01-03,-0.428816,0.27461
2000-01-04,1.020426,-0.911566
2000-01-05,-0.13487,0.177414
2000-01-06,-0.560241,0.035705
2000-01-07,1.655818,-1.600893
2000-01-08,-0.543711,-0.498317


In [57]:
df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy()

In [58]:
df[['A', 'B']]

Unnamed: 0,A,B
2000-01-01,-1.263342,0.666581
2000-01-02,-0.210266,0.669452
2000-01-03,0.27461,-0.428816
2000-01-04,-0.911566,1.020426
2000-01-05,0.177414,-0.13487
2000-01-06,0.035705,-0.560241
2000-01-07,-1.600893,1.655818
2000-01-08,-0.498317,-0.543711


### Attribute access

In [59]:
sa = pd.Series([1, 2, 3], index=list('abc'))
sa

a    1
b    2
c    3
dtype: int64

In [60]:
dfa = df.copy()
dfa

Unnamed: 0,A,B,C,D
2000-01-01,-1.263342,0.666581,0.484328,-0.631289
2000-01-02,-0.210266,0.669452,0.496605,0.284778
2000-01-03,0.27461,-0.428816,-0.294351,0.955873
2000-01-04,-0.911566,1.020426,-2.228317,0.0787
2000-01-05,0.177414,-0.13487,-1.72824,0.551976
2000-01-06,0.035705,-0.560241,0.5264,-0.64275
2000-01-07,-1.600893,1.655818,-0.480091,-0.374639
2000-01-08,-0.498317,-0.543711,-0.279709,0.931254


In [61]:
sa['b'] # or sa.b

2

In [62]:
dfa.A # or dfa['A']

2000-01-01   -1.263342
2000-01-02   -0.210266
2000-01-03    0.274610
2000-01-04   -0.911566
2000-01-05    0.177414
2000-01-06    0.035705
2000-01-07   -1.600893
2000-01-08   -0.498317
Freq: D, Name: A, dtype: float64

In [63]:
sa.a = 5
sa

a    5
b    2
c    3
dtype: int64

In [64]:
dfa.A = list(range(len(dfa.index)))
dfa

Unnamed: 0,A,B,C,D
2000-01-01,0,0.666581,0.484328,-0.631289
2000-01-02,1,0.669452,0.496605,0.284778
2000-01-03,2,-0.428816,-0.294351,0.955873
2000-01-04,3,1.020426,-2.228317,0.0787
2000-01-05,4,-0.13487,-1.72824,0.551976
2000-01-06,5,-0.560241,0.5264,-0.64275
2000-01-07,6,1.655818,-0.480091,-0.374639
2000-01-08,7,-0.543711,-0.279709,0.931254


In [65]:
dfa['A'] = list(range(len(dfa.index))) # Creating a new column A
dfa

Unnamed: 0,A,B,C,D
2000-01-01,0,0.666581,0.484328,-0.631289
2000-01-02,1,0.669452,0.496605,0.284778
2000-01-03,2,-0.428816,-0.294351,0.955873
2000-01-04,3,1.020426,-2.228317,0.0787
2000-01-05,4,-0.13487,-1.72824,0.551976
2000-01-06,5,-0.560241,0.5264,-0.64275
2000-01-07,6,1.655818,-0.480091,-0.374639
2000-01-08,7,-0.543711,-0.279709,0.931254


In [66]:
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})
x

Unnamed: 0,x,y
0,1,3
1,2,4
2,3,5


In [67]:
x.iloc[1] = {'x': 9, 'y': 99}
x

Unnamed: 0,x,y
0,1,3
1,9,99
2,3,5


In [68]:
dfb = pd.DataFrame({'one': [1., 2., 3.]})
dfb

Unnamed: 0,one
0,1.0
1,2.0
2,3.0


In [69]:
dfb.two = [4, 5, 6]

  """Entry point for launching an IPython kernel.


### Slicing ranges

In [70]:
s[:5]

2000-01-01   -1.263342
2000-01-02   -0.210266
2000-01-03    0.274610
2000-01-04   -0.911566
2000-01-05    0.177414
Freq: D, Name: A, dtype: float64

In [71]:
 s[::2]

2000-01-01   -1.263342
2000-01-03    0.274610
2000-01-05    0.177414
2000-01-07   -1.600893
Freq: 2D, Name: A, dtype: float64

In [72]:
s[::-1]

2000-01-08   -0.498317
2000-01-07   -1.600893
2000-01-06    0.035705
2000-01-05    0.177414
2000-01-04   -0.911566
2000-01-03    0.274610
2000-01-02   -0.210266
2000-01-01   -1.263342
Freq: -1D, Name: A, dtype: float64

In [73]:
s2 = s.copy()

In [74]:
s2[:5] = 0
s2

2000-01-01    0.000000
2000-01-02    0.000000
2000-01-03    0.000000
2000-01-04    0.000000
2000-01-05    0.000000
2000-01-06    0.035705
2000-01-07   -1.600893
2000-01-08   -0.498317
Freq: D, Name: A, dtype: float64

In [75]:
df[:3]

Unnamed: 0,A,B,C,D
2000-01-01,-1.263342,0.666581,0.484328,-0.631289
2000-01-02,-0.210266,0.669452,0.496605,0.284778
2000-01-03,0.27461,-0.428816,-0.294351,0.955873


In [76]:
df[::-1]

Unnamed: 0,A,B,C,D
2000-01-08,-0.498317,-0.543711,-0.279709,0.931254
2000-01-07,-1.600893,1.655818,-0.480091,-0.374639
2000-01-06,0.035705,-0.560241,0.5264,-0.64275
2000-01-05,0.177414,-0.13487,-1.72824,0.551976
2000-01-04,-0.911566,1.020426,-2.228317,0.0787
2000-01-03,0.27461,-0.428816,-0.294351,0.955873
2000-01-02,-0.210266,0.669452,0.496605,0.284778
2000-01-01,-1.263342,0.666581,0.484328,-0.631289


### Selection by label

In [77]:
df = pd.DataFrame(np.random.randn(5, 4),
   ....:                    columns=list('ABCD'),
   ....:                    index=pd.date_range('20130101', periods=5))

Unnamed: 0,A,B,C,D
2013-01-01,0.079711,1.168447,-0.358891,-0.148874
2013-01-02,-1.80274,-0.445357,-0.79865,-0.317377
2013-01-03,-0.757718,-0.606927,1.017408,0.200015
2013-01-04,0.28426,-1.126983,-0.361774,-0.002129
2013-01-05,-1.315812,-1.122901,0.200381,0.110441


In [79]:
dfl.loc['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-1.80274,-0.445357,-0.79865,-0.317377
2013-01-03,-0.757718,-0.606927,1.017408,0.200015
2013-01-04,0.28426,-1.126983,-0.361774,-0.002129


In [81]:
s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
s1

a   -0.177791
b   -0.076336
c    0.391827
d    2.848419
e    0.613649
f    0.998582
dtype: float64

In [82]:
s1.loc['c':]

c    0.391827
d    2.848419
e    0.613649
f    0.998582
dtype: float64

In [83]:
s1.loc['b']

-0.07633625323768804

In [85]:
s1.loc['c':] = 0
s1

a   -0.177791
b   -0.076336
c    0.000000
d    0.000000
e    0.000000
f    0.000000
dtype: float64

In [87]:
df1 = pd.DataFrame(np.random.randn(6, 4),
   ....:                    index=list('abcdef'),
   ....:                    columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
a,-2.759247,1.356175,-1.596958,2.716311
b,-0.84445,-0.760568,-0.008846,-0.978212
c,0.875598,0.692285,-0.766874,0.432695
d,1.938683,-1.614065,-0.404342,1.590187
e,1.099397,-1.049892,1.056034,0.018047
f,0.621995,-0.290165,-1.215836,-0.899609


In [88]:
df1.loc[['a', 'b', 'd'], :]

Unnamed: 0,A,B,C,D
a,-2.759247,1.356175,-1.596958,2.716311
b,-0.84445,-0.760568,-0.008846,-0.978212
d,1.938683,-1.614065,-0.404342,1.590187


In [89]:
df1.loc['d':, 'A':'C']

Unnamed: 0,A,B,C
d,1.938683,-1.614065,-0.404342
e,1.099397,-1.049892,1.056034
f,0.621995,-0.290165,-1.215836


In [90]:
df1.loc['a']

A   -2.759247
B    1.356175
C   -1.596958
D    2.716311
Name: a, dtype: float64

In [91]:
df1.loc['a'] > 0

A    False
B     True
C    False
D     True
Name: a, dtype: bool

In [92]:
df1.loc[:,df1.loc['a'] > 0]

Unnamed: 0,B,D
a,1.356175,2.716311
b,-0.760568,-0.978212
c,0.692285,0.432695
d,-1.614065,1.590187
e,-1.049892,0.018047
f,-0.290165,-0.899609


In [94]:
mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean")
mask

<BooleanArray>
[True, False, True, False, <NA>, False]
Length: 6, dtype: boolean

In [95]:
df1[mask]

Unnamed: 0,A,B,C,D
a,-2.759247,1.356175,-1.596958,2.716311
c,0.875598,0.692285,-0.766874,0.432695


In [96]:
df1.loc['a', 'A']

-2.759246655320073

In [97]:
df1.at['a','A']

-2.759246655320073

### Slicing with labels 

In [98]:
s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4])
s

0    a
3    b
2    c
5    d
4    e
dtype: object

In [102]:
s.loc[3:5]

3    b
2    c
5    d
dtype: object

In [103]:
s.sort_index()

0    a
2    c
3    b
4    e
5    d
dtype: object

In [104]:
s.sort_index().loc[1:6]

2    c
3    b
4    e
5    d
dtype: object

In [105]:
s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2])
s

0    a
3    b
2    c
5    d
4    e
2    f
dtype: object

In [106]:
s.loc[3:5]

3    b
2    c
5    d
dtype: object

### Selection by position

In [3]:
s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2)))
s1

0   -0.468933
2    0.688373
4   -0.005906
6    1.392219
8   -1.703818
dtype: float64

In [4]:
s1.iloc[:3]

0   -0.468933
2    0.688373
4   -0.005906
dtype: float64

In [5]:
s1.iloc[3]

1.392219240197648

In [7]:
s1.iloc[:3] = 0
s1

0    0.000000
2    0.000000
4    0.000000
6    1.392219
8   -1.703818
dtype: float64

In [9]:
df1 = pd.DataFrame(np.random.randn(6, 4),
   ....:                    index=list(range(0, 12, 2)),
   ....:                    columns=list(range(0, 8, 2)))

df1

Unnamed: 0,0,2,4,6
0,0.181334,0.543142,0.355106,-0.182668
2,0.174352,-0.523956,-0.262189,-0.620017
4,-0.37163,1.765979,0.228645,2.559595
6,-0.540759,-0.623566,-0.580695,0.33506
8,0.170172,0.323626,-1.3311,-7.6e-05
10,-0.206917,-1.08369,-0.307869,0.522571


In [10]:
df1.iloc[:3]

Unnamed: 0,0,2,4,6
0,0.181334,0.543142,0.355106,-0.182668
2,0.174352,-0.523956,-0.262189,-0.620017
4,-0.37163,1.765979,0.228645,2.559595


In [11]:
df1.iloc[1:5, 2:4]

Unnamed: 0,4,6
2,-0.262189,-0.620017
4,0.228645,2.559595
6,-0.580695,0.33506
8,-1.3311,-7.6e-05


In [15]:
df1.iloc[[1, 3, 5], [1, 3]] # Confusing

Unnamed: 0,2,6
2,-0.523956,-0.620017
6,-0.623566,0.33506
10,-1.08369,0.522571


In [16]:
df1.iloc[1:3, :]

Unnamed: 0,0,2,4,6
2,0.174352,-0.523956,-0.262189,-0.620017
4,-0.37163,1.765979,0.228645,2.559595


In [17]:
df1.iloc[:, 1:3]

Unnamed: 0,2,4
0,0.543142,0.355106
2,-0.523956,-0.262189
4,1.765979,0.228645
6,-0.623566,-0.580695
8,0.323626,-1.3311
10,-1.08369,-0.307869


In [18]:
# this is also equivalent to ``df1.iat[1,1]``

df1.iloc[1, 1]

-0.5239561152869976

In [19]:
df1.iloc[1]

0    0.174352
2   -0.523956
4   -0.262189
6   -0.620017
Name: 2, dtype: float64

In [20]:
# these are allowed in Python/NumPy.

x = list('abcdef')
x

['a', 'b', 'c', 'd', 'e', 'f']

In [22]:
x[4:10]

['e', 'f']

In [23]:
x[8:10]

[]

In [24]:
s = pd.Series(x)
s

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [25]:
s.iloc[4:10]

4    e
5    f
dtype: object

In [26]:
s.iloc[8:10]

Series([], dtype: object)

In [28]:
dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfl

Unnamed: 0,A,B
0,0.756465,0.293892
1,0.050647,-1.981363
2,-0.306756,0.157314
3,0.532746,1.680355
4,0.434421,0.078154


In [29]:
dfl.iloc[:, 2:3]

#Empty DataFrame
#Columns: []
#Index: [0, 1, 2, 3, 4]

0
1
2
3
4


In [30]:
dfl.iloc[:, 1:3]

Unnamed: 0,B
0,0.293892
1,-1.981363
2,0.157314
3,1.680355
4,0.078154


In [31]:
dfl.iloc[4:6]

Unnamed: 0,A,B
4,0.434421,0.078154


In [None]:
dfl.iloc[[4, 5, 6]]

# Error - IndexError: positional indexers are out-of-bounds

In [None]:
dfl.iloc[:, 4]

# IndexError: single positional indexer is out-of-bounds

### Selection by callable

In [3]:
df1 = pd.DataFrame(np.random.randn(6, 4),
   ....:                    index=list('abcdef'),
   ....:                    columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
a,-0.563005,0.079383,-1.670475,0.093805
b,1.29788,-0.438511,1.145599,-0.073776
c,-0.696329,0.853656,0.556782,0.555525
d,-0.712985,1.360501,-0.721864,-1.395233
e,-1.045571,1.3342,0.163802,-1.251775
f,1.66809,2.420897,0.358892,0.163338


In [4]:
df1.loc[lambda df: df['A'] > 0, :]

Unnamed: 0,A,B,C,D
b,1.29788,-0.438511,1.145599,-0.073776
f,1.66809,2.420897,0.358892,0.163338


In [5]:
df1.loc[:, lambda df: ['A', 'B']]

Unnamed: 0,A,B
a,-0.563005,0.079383
b,1.29788,-0.438511
c,-0.696329,0.853656
d,-0.712985,1.360501
e,-1.045571,1.3342
f,1.66809,2.420897


In [6]:
df1.iloc[:, lambda df: [0, 1]]

Unnamed: 0,A,B
a,-0.563005,0.079383
b,1.29788,-0.438511
c,-0.696329,0.853656
d,-0.712985,1.360501
e,-1.045571,1.3342
f,1.66809,2.420897


In [7]:
df1[lambda df: df.columns[0]]

a   -0.563005
b    1.297880
c   -0.696329
d   -0.712985
e   -1.045571
f    1.668090
Name: A, dtype: float64

In [8]:
df1['A'].loc[lambda s: s > 0]

b    1.29788
f    1.66809
Name: A, dtype: float64

In [None]:
bb = pd.read_csv('data/baseball.csv', index_col='id')
bb

In [None]:
(bb.groupby(['year', 'team']).sum()
   ....:    .loc[lambda df: df['r'] > 100])

### Combining positional and label-based indexing

In [11]:
dfd = pd.DataFrame({'A': [1, 2, 3],
   ....:                     'B': [4, 5, 6]},
   ....:                    index=list('abc'))

dfd

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [14]:
#If you wish to get the 0th and the 2nd elements from the index in the ‘A’ column, you can do:
dfd.loc[dfd.index[[0, 2]], 'A']

a    1
c    3
Name: A, dtype: int64

In [13]:
dfd.iloc[[0, 2], dfd.columns.get_loc('A')]

a    1
c    3
Name: A, dtype: int64

In [15]:
dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])]

Unnamed: 0,A,B
a,1,4
c,3,6


### Indexing with list with missing labels is deprecated

In [19]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [20]:
s.loc[[1, 2]]

1    2
2    3
dtype: int64

In [None]:
s.loc[[1, 2, 3]]

# KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Int64Index([3], dtype='int64'). 
# See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"