# loc

In [1]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [2]:
s.loc[[1,2]]

1    2
2    3
dtype: int64

In [3]:
pd.__version__

'0.23.4'

In [4]:
s.loc[[1, 2, 3]] #3 not in index 

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


1    2.0
2    3.0
3    NaN
dtype: float64

# reindex

In [7]:
s.reindex([2,3])

2    3.0
3    NaN
dtype: float64

# sample

In [8]:
s = pd.Series([0,1,2,3,4,5])
s

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [12]:
s.sample(5)

1    1
0    0
3    3
5    5
2    2
dtype: int64

In [13]:
s.sample(frac=0.3)

5    5
0    0
dtype: int64

In [14]:
s.sample(n=6, replace=False)

3    3
1    1
0    0
4    4
2    2
5    5
dtype: int64

In [15]:
s.sample(n=6, replace=True)

2    2
0    0
1    1
4    4
0    0
4    4
dtype: int64

In [19]:
example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4]
s.sample(n=2, weights=example_weights)

3    3
4    4
dtype: int64

In [25]:
df2 = pd.DataFrame({'col1':[9,8,7,6], 'weight_column':[0.5, 0.4, 0.1, 0]})
df2.sample(n = 3, weights = 'weight_column')

Unnamed: 0,col1,weight_column
2,7,0.1
1,8,0.4
0,9,0.5


In [28]:
df3 = pd.DataFrame({'col1':[1,2,3], 'col2':[2,3,4]})
df3.sample(n=1, axis=1)

Unnamed: 0,col2
0,2
1,3
2,4


# boolean

In [31]:
df2 = pd.DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'],
                   'b' : ['x', 'y', 'y', 'x', 'y', 'x', 'x'],
                   'c' : np.random.randn(7)})
df2

Unnamed: 0,a,b,c
0,one,x,-0.939774
1,one,y,0.240208
2,two,y,-3.346636
3,three,x,-2.219826
4,two,y,1.286471
5,one,x,0.124517
6,six,x,0.066182


In [35]:
df2.a.str.startswith('t')
df2.a.map(lambda x:x.startswith('t'))

0    False
1    False
2     True
3     True
4     True
5    False
6    False
Name: a, dtype: bool

0    False
1    False
2     True
3     True
4     True
5    False
6    False
Name: a, dtype: bool

# isin

In [36]:
s = pd.Series(np.arange(5), index=np.arange(5)[::-1], dtype='int64')
s[s.index.isin([2, 4, 6])]
s.reindex([2, 4, 6])

4    0
2    2
dtype: int64

2    2.0
4    0.0
6    NaN
dtype: float64

In [37]:
s_mi = pd.Series(np.arange(6),
                 index=pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']]))
s_mi.iloc[s_mi.index.isin([(1, 'a'), (2, 'b'), (0, 'c')])]
s_mi.iloc[s_mi.index.isin(['a', 'c', 'e'], level=1)]

0  c    2
1  a    3
dtype: int32

0  a    0
   c    2
1  a    3
   c    5
dtype: int32

In [38]:
df = pd.DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
                    'ids2': ['a', 'n', 'c', 'n']})

values = ['a', 'b', 1, 3]
df.isin(values)

Unnamed: 0,vals,ids,ids2
0,True,True,True
1,False,True,False
2,True,False,False
3,False,False,False


In [40]:
values = {'ids': ['a', 'b'], 'vals': [1, 3]}
df.isin(values)

Unnamed: 0,vals,ids,ids2
0,True,True,False
1,False,True,False
2,True,False,False
3,False,False,False


In [47]:
values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]}
row_mask = df.isin(values).all(1)
df[row_mask]

Unnamed: 0,vals,ids,ids2
0,1,a,a


In [53]:
df.isin(values)
df.isin(values).all(1)

Unnamed: 0,vals,ids,ids2
0,True,True,True
1,False,True,False
2,True,False,True
3,False,False,False


0     True
1    False
2    False
3    False
dtype: bool

In [1]:
df = pd.DataFrame({'a': list('aabbccddeeff'), 'b': list('aaaabbbbcccc'),
                   'c': np.random.randint(5, size=12),
                   'd': np.random.randint(9, size=12)})
df.query('a in b')

Unnamed: 0,a,b,c,d
0,a,a,3,7
1,a,a,1,1
2,b,a,1,2
3,b,a,2,6
4,c,b,2,2
5,c,b,0,6


In [2]:
df.a.isin(df.b)

0      True
1      True
2      True
3      True
4      True
5      True
6     False
7     False
8     False
9     False
10    False
11    False
Name: a, dtype: bool

# lookup

In [8]:
dflookup = pd.DataFrame(np.random.rand(9,4), columns = ['A','B','C','D'])
dflookup
list(range(0,10,2))
dflookup.lookup(list(range(0,10,2)), ['B','C','A','B','D'])


Unnamed: 0,A,B,C,D
0,0.606326,0.525432,0.348183,0.109167
1,0.377874,0.188566,0.809605,0.615646
2,0.159053,0.932305,0.456713,0.424693
3,0.653597,0.64746,0.167332,0.763162
4,0.824759,0.501047,0.529417,0.079046
5,0.56885,0.237805,0.941439,0.506292
6,0.842279,0.551948,0.154407,0.980388
7,0.480111,0.016512,0.463015,0.39559
8,0.303028,0.741454,0.214862,0.322639


[0, 2, 4, 6, 8]

array([ 0.52543161,  0.45671312,  0.82475873,  0.5519485 ,  0.3226389 ])

[0, 2, 4, 6, 8]