# Chapter 5.2 Essential Functionality

In [1]:
import pandas as pd

obj = pd.Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [4]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [2]:
obj.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [8]:
import numpy as np
frame = pd.DataFrame(np.arange(9).reshape((3,3)), index = ['a', 'b', 'c'], columns = ['oh', 'tx', 'ca'])
frame

Unnamed: 0,oh,tx,ca
a,0,1,2
b,3,4,5
c,6,7,8


In [9]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,oh,tx,ca
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,,,


In [10]:
state = ['tx', 'ca', 'oh']
frame3 = frame2.reindex(columns = state)
frame3

Unnamed: 0,tx,ca,oh
a,1.0,2.0,0.0
b,4.0,5.0,3.0
c,7.0,8.0,6.0
d,,,


In [12]:
frame3.loc[['a', 'c'], state]

Unnamed: 0,tx,ca,oh
a,1.0,2.0,0.0
c,7.0,8.0,6.0


In [15]:
frame3.columns

Index(['tx', 'ca', 'oh'], dtype='object')

In [16]:
frame3.columns.name = 'states'
frame3

states,tx,ca,oh
a,1.0,2.0,0.0
b,4.0,5.0,3.0
c,7.0,8.0,6.0
d,,,


## Drop

In [17]:
obj = pd.Series(np.arange(5), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [19]:
new_obj = obj.drop('c')
new_obj

a    0
b    1
d    3
e    4
dtype: int64

In [20]:
obj.drop(['a', 'c'])

b    1
d    3
e    4
dtype: int64

In [22]:
data = pd.DataFrame(np.arange(16).reshape((4,4)), index = ['a', 'b', 'c', 'e'], columns = ['oh', 'tx', 'ca', 'ma'])
data

Unnamed: 0,oh,tx,ca,ma
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
e,12,13,14,15


In [30]:
list(data.columns)
data.drop('b')


Unnamed: 0,oh,tx,ca,ma
a,0,1,2,3
c,8,9,10,11
e,12,13,14,15


In [33]:
data.drop('ca', axis=1)


Unnamed: 0,oh,tx,ma
a,0,1,3
b,4,5,7
c,8,9,11
e,12,13,15


In [34]:
data.drop('ca', axis='columns')

Unnamed: 0,oh,tx,ma
a,0,1,3
b,4,5,7
c,8,9,11
e,12,13,15


In [35]:
data.drop('oh', axis=1, inplace=True)
data
#withou inplace, drop will not return new object

Unnamed: 0,tx,ca,ma
a,1,2,3
b,5,6,7
c,9,10,11
e,13,14,15


## Inde, selection and filtering

In [36]:
obj = pd.Series(np.arange(4), index=['a', 'b', 'd', 'e'])
obj

a    0
b    1
d    2
e    3
dtype: int64

In [37]:
obj['b']

1

In [39]:
obj[2]

2

In [40]:
obj[1:3]

b    1
d    2
dtype: int64

In [41]:
obj[['a', 'd']]

a    0
d    2
dtype: int64

In [42]:
obj[obj<2]

a    0
b    1
dtype: int64

In [43]:
obj[1:3] = 100
obj

a      0
b    100
d    100
e      3
dtype: int64

In [44]:
data = pd.DataFrame(np.arange(16).reshape((4,4)), index = ['a', 'b', 'c', 'e'], columns = ['oh', 'tx', 'ca', 'ma'])
data

Unnamed: 0,oh,tx,ca,ma
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
e,12,13,14,15


In [45]:
data[:2]

Unnamed: 0,oh,tx,ca,ma
a,0,1,2,3
b,4,5,6,7


In [46]:
data['ca']

a     2
b     6
c    10
e    14
Name: ca, dtype: int64

In [47]:
data[['ca', 'oh']]

Unnamed: 0,ca,oh
a,2,0
b,6,4
c,10,8
e,14,12


In [48]:
data[data['ca']>6]

Unnamed: 0,oh,tx,ca,ma
c,8,9,10,11
e,12,13,14,15


In [49]:
data['ca'] >6

a    False
b    False
c     True
e     True
Name: ca, dtype: bool

In [50]:
data.loc['b', ['oh', 'ca']]

oh    4
ca    6
Name: b, dtype: int64

In [52]:
#i stands for integer, so the location has to be numberic
data.iloc[2, [1,3]]

tx     9
ma    11
Name: c, dtype: int64

In [53]:
data

Unnamed: 0,oh,tx,ca,ma
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
e,12,13,14,15


In [55]:
data.iloc[2]

oh     8
tx     9
ca    10
ma    11
Name: c, dtype: int64

In [58]:
data.iloc[:2, 2:]

Unnamed: 0,ca,ma
a,2,3
b,6,7


In [59]:
data.iloc[:2, 2:][data>2]

Unnamed: 0,ca,ma
a,,3
b,6.0,7


In [63]:
data.iloc[:2, 2:][data['ma']>3]

  """Entry point for launching an IPython kernel.


Unnamed: 0,ca,ma
b,6,7


In [66]:
data.iloc[:2, 2:][data>2].fillna(0)

Unnamed: 0,ca,ma
a,0.0,3
b,6.0,7


In [67]:
frame

Unnamed: 0,oh,tx,ca
a,0,1,2
b,3,4,5
c,6,7,8


In [82]:
frame['oh'].max()

6

In [84]:
#lambda function will apply calculation on each column
#Following calc is find the difference between largest number in each column and min number in each column
frame.apply(lambda x: x.max()-x.min())

oh    6
tx    6
ca    6
dtype: int64

In [86]:
frame.apply(lambda x: x.max()-x.min(), axis='columns')

a    2
b    2
c    2
dtype: int64

In [88]:
def f(x):
    return pd.Series([x.min(), x.max()], index= ['min', 'max'])
frame.apply(f)

Unnamed: 0,oh,tx,ca
min,0,1,2
max,6,7,8


In [89]:
#applymap will apply function to each element
frame.applymap(lambda x: '%.2f' %x)


Unnamed: 0,oh,tx,ca
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
