# Indexing and Selection

In this section, we'll have a look into indexing and selection for both Series and DataFrames.

In [1]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

## Series

In [2]:
ser1 = Series(data=np.arange(5,8),index=['x','y','z'])
ser1

x    5
y    6
z    7
dtype: int32

In [3]:
ser1.index

Index(['x', 'y', 'z'], dtype='object')

In [4]:
ser1['x']

5

In [5]:
ser1[0]

5

In [6]:
ser1['x':'z']

x    5
y    6
z    7
dtype: int32

In [7]:
ser1[['x','z']]

x    5
z    7
dtype: int32

### Conditional selection (masking)

In [8]:
ser1

x    5
y    6
z    7
dtype: int32

In [9]:
ser1>6

x    False
y    False
z     True
dtype: bool

In [10]:
ser1[ser1>6]

z    7
dtype: int32

In [11]:
ser1[ser1>6] = ser1*2
ser1

x     5
y     6
z    14
dtype: int32

## DataFrames

In [12]:
df = DataFrame(data=np.arange(20).reshape(5,4),
              index=['x','y','z','m','n'],
              columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
x,0,1,2,3
y,4,5,6,7
z,8,9,10,11
m,12,13,14,15
n,16,17,18,19


In [13]:
df['A']

x     0
y     4
z     8
m    12
n    16
Name: A, dtype: int32

In [14]:
df[['A','D']]   #['A','D'] -> list of columns we want to select

Unnamed: 0,A,D
x,0,3
y,4,7
z,8,11
m,12,15
n,16,19


### selecting rows
#### loc --> enter index label
####                                           iloc --> enter the index number

In [15]:
df.loc['x']  # row with the label 'x'

A    0
B    1
C    2
D    3
Name: x, dtype: int32

In [16]:
df.iloc[0]   # row with index number 0

A    0
B    1
C    2
D    3
Name: x, dtype: int32

In [17]:
df.loc[['x','n']]  #selecting more than one row using their labels

Unnamed: 0,A,B,C,D
x,0,1,2,3
n,16,17,18,19


In [18]:
df.iloc[[0,-1]] #selecting more than one row using their index numbers

Unnamed: 0,A,B,C,D
x,0,1,2,3
n,16,17,18,19


In [19]:
df

Unnamed: 0,A,B,C,D
x,0,1,2,3
y,4,5,6,7
z,8,9,10,11
m,12,13,14,15
n,16,17,18,19


In [20]:
df.loc['m','B']  #selecting a single item

13

### Conditional selection (masking)

In [21]:
df>6

Unnamed: 0,A,B,C,D
x,False,False,False,False
y,False,False,False,True
z,True,True,True,True
m,True,True,True,True
n,True,True,True,True


In [22]:
df[df['B']>9]

Unnamed: 0,A,B,C,D
m,12,13,14,15
n,16,17,18,19


In [23]:
df[(df['B']>9) or (df['B']==1)] #only bitwise operators are supported

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [24]:
df[(df['B']>9) | (df['B']==1)]  

Unnamed: 0,A,B,C,D
x,0,1,2,3
m,12,13,14,15
n,16,17,18,19
