In [2]:
import pandas as pd
import numpy as np

In [3]:
# Essential Functions

In [4]:
# 1) Reindexing: 
obj = pd.Series([4.5, 7.2, -5.3, 3.8], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.8
dtype: float64

In [5]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.8
d    4.5
e    NaN
dtype: float64

In [6]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [7]:
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [8]:
# alter indexing

In [9]:
frame = pd.DataFrame(np.arange(9).reshape(3, 3), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [10]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [11]:
# columns can be reindexed

In [12]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [13]:
# 2) Dropping Entries from an axis

In [14]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [15]:
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [16]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [17]:
# in dataframe
df = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                  index=['Ohio', 'Colorado', 'North Carolina', 'Utah'],
                  columns=['one', 'two', 'three', 'four']
                  )
df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
North Carolina,8,9,10,11
Utah,12,13,14,15


In [18]:
df.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
North Carolina,8,9,10,11
Utah,12,13,14,15


In [19]:
# drop values from column
df.drop('two', axis='columns')

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
North Carolina,8,10,11
Utah,12,14,15


In [20]:
df.drop(['one', 'three'], axis='columns')

Unnamed: 0,two,four
Ohio,1,3
Colorado,5,7
North Carolina,9,11
Utah,13,15


In [21]:
# Indexing, Selection, Filterinig

In [22]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [23]:
obj['a']

0.0

In [24]:
obj[0]

0.0

In [25]:
obj[1:3]

b    1.0
c    2.0
dtype: float64

In [26]:
obj[['a', 'b', 'c']]

a    0.0
b    1.0
c    2.0
dtype: float64

In [27]:
obj[obj>2]

d    3.0
dtype: float64

In [28]:
# slicing with labels
obj['b':'c']

b    1.0
c    2.0
dtype: float64

In [29]:
obj['a':'b'] = 6
obj

a    6.0
b    6.0
c    2.0
d    3.0
dtype: float64

In [30]:
# indexing into dataframe
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                   index=['Ohio', 'Colorado', 'Utah', 'NewYork'],
                   columns=['One', 'Two', 'Three', 'Four'])
data

Unnamed: 0,One,Two,Three,Four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
NewYork,12,13,14,15


In [31]:
data['Two']   # accessing specific columns

Ohio         1
Colorado     5
Utah         9
NewYork     13
Name: Two, dtype: int64

In [32]:
data[['One', 'Four']]

Unnamed: 0,One,Four
Ohio,0,3
Colorado,4,7
Utah,8,11
NewYork,12,15


In [33]:
data[:2] # from first

Unnamed: 0,One,Two,Three,Four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [34]:
data[2:]  # from last

Unnamed: 0,One,Two,Three,Four
Utah,8,9,10,11
NewYork,12,13,14,15


In [35]:
data[data['Three'] > 5]

Unnamed: 0,One,Two,Three,Four
Colorado,4,5,6,7
Utah,8,9,10,11
NewYork,12,13,14,15


In [36]:
data[data['One'] > 1]

Unnamed: 0,One,Two,Three,Four
Colorado,4,5,6,7
Utah,8,9,10,11
NewYork,12,13,14,15


In [38]:
data > 5

Unnamed: 0,One,Two,Three,Four
Ohio,False,False,False,False
Colorado,False,False,True,True
Utah,True,True,True,True
NewYork,True,True,True,True


In [39]:
data[data < 5] = 0
data

Unnamed: 0,One,Two,Three,Four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
NewYork,12,13,14,15


In [40]:
# Selection with loc and iloc operators

In [44]:
data.loc['Colorado', ['One', 'Two']]

One    0
Two    5
Name: Colorado, dtype: int64

In [45]:
data.iloc[2, [3, 0, 1]]

Four    11
One      8
Two      9
Name: Utah, dtype: int64

In [46]:
data.iloc[2]

One       8
Two       9
Three    10
Four     11
Name: Utah, dtype: int64

In [48]:
data.iloc[3, [1,2]]

Two      13
Three    14
Name: NewYork, dtype: int64

In [49]:
# using slicing
data.loc[: 'Utah', 'Two']

Ohio        0
Colorado    5
Utah        9
Name: Two, dtype: int64

In [50]:
# Integer Indexes

In [51]:
ser = pd.Series(np.arange(3.))
ser
# ser[-1] = it gives key error

0    0.0
1    1.0
2    2.0
dtype: float64

In [55]:
ser2 = pd.Series(np.arange(3.), index=['a', 'b', 'c'])
ser2
ser2[-1]

2.0

In [56]:
# Arithmetic and Data alignment

In [58]:
s1  = pd.Series([7.3, -2.5, 3.4, 1.5 ], index=['a', 'c', 'd', 'e'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [59]:
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [60]:
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [61]:
# in dataframe
d1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),
                 index=['Ohio', 'Texas', 'Colorado'])
d1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [62]:
d2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
                 index=['Utah', 'Ohio', 'Texas', 'Oregon'])
d2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [63]:
d1 + d2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,
