In [1]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame

# Indexing

In [2]:
ser1 = Series([1,2,3,4],index=['A','B','C','D'])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [3]:
ser1.index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [4]:
my_index = ser1.index

In [5]:
my_index[3]

'D'

In [6]:
# Index is mutable so we cant change its value
# uncoment code below to see what happen
# my_index[3] = 'A'

In [7]:
ser2 = ser1.reindex(['A','B','C','D','E','F'])
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [8]:
ser2 = ser2.reindex(['A','B','C','D','E','F','G'],fill_value=0)
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [9]:
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])

ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [10]:
ranger = range(15)
ranger

range(0, 15)

In [11]:
ser3.reindex(ranger,method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [12]:
dframe = DataFrame(np.random.randn(25).reshape((5,5)),index=['A','B','C','D','E'],
        columns=['col1','col2','col3','col4','col5'])
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,1.05293,-0.347542,-0.443725,-0.34009,-0.708796
B,1.29495,2.696456,-0.72886,0.145108,-0.82267
C,-1.475029,0.258388,0.261894,0.748513,-2.275061
D,0.315519,0.752361,-1.163727,-0.57122,-0.286474
E,0.741795,-0.247512,1.83556,-0.132121,0.880136


In [13]:
dframe2 = dframe.reindex(['A','B','C','D','E','F'])
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,1.05293,-0.347542,-0.443725,-0.34009,-0.708796
B,1.29495,2.696456,-0.72886,0.145108,-0.82267
C,-1.475029,0.258388,0.261894,0.748513,-2.275061
D,0.315519,0.752361,-1.163727,-0.57122,-0.286474
E,0.741795,-0.247512,1.83556,-0.132121,0.880136
F,,,,,


In [14]:
new_columns = ['col1','col2','col3','col4','col5','col6']

In [15]:
dframe2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,1.05293,-0.347542,-0.443725,-0.34009,-0.708796,
B,1.29495,2.696456,-0.72886,0.145108,-0.82267,
C,-1.475029,0.258388,0.261894,0.748513,-2.275061,
D,0.315519,0.752361,-1.163727,-0.57122,-0.286474,
E,0.741795,-0.247512,1.83556,-0.132121,0.880136,
F,,,,,,


# Drop entries

In [16]:
ser1 = Series(np.arange(3),index=['a','b','c'])
ser1

a    0
b    1
c    2
dtype: int64

In [17]:
ser1.drop('b')

a    0
c    2
dtype: int64

In [18]:
dframe1 = DataFrame(np.arange(9).reshape(3,3),index=['SF','LA','NY'],columns=['pop','size','year'])
dframe1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [19]:
dframe2 = dframe1.drop('LA')
dframe2

Unnamed: 0,pop,size,year
SF,0,1,2
NY,6,7,8


In [20]:
dframe1.drop('year',axis=1)

Unnamed: 0,pop,size
SF,0,1
LA,3,4
NY,6,7


# Select entry

In [21]:
ser1 = Series(np.arange(3),index=['a','b','c'])
ser1 = 2*ser1
ser1

a    0
b    2
c    4
dtype: int64

In [22]:
ser1['b']

2

In [23]:
ser1[1]

2

In [24]:
ser1[[1,2]]

b    2
c    4
dtype: int64

In [25]:
ser1[:2]

a    0
b    2
dtype: int64

In [26]:
ser1[['a','b']]

a    0
b    2
dtype: int64

In [27]:
ser1[ser1>3]

c    4
dtype: int64

In [28]:
ser1[ser1>3] = 10
ser1

a     0
b     2
c    10
dtype: int64

In [29]:
dframe = DataFrame(np.arange(25).reshape((5,5)),index=['NYC','LA','SF','DC','Chi'],
                  columns=['A','B','C','D','E'])
dframe

Unnamed: 0,A,B,C,D,E
NYC,0,1,2,3,4
LA,5,6,7,8,9
SF,10,11,12,13,14
DC,15,16,17,18,19
Chi,20,21,22,23,24


In [30]:
dframe['B']

NYC     1
LA      6
SF     11
DC     16
Chi    21
Name: B, dtype: int64

In [31]:
dframe[['B','E']]

Unnamed: 0,B,E
NYC,1,4
LA,6,9
SF,11,14
DC,16,19
Chi,21,24


In [32]:
dframe[dframe['C']>8]

Unnamed: 0,A,B,C,D,E
SF,10,11,12,13,14
DC,15,16,17,18,19
Chi,20,21,22,23,24


In [33]:
dframe>10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
Chi,True,True,True,True,True


In [34]:
dframe.loc['LA']

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int64