In [16]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
rng = np.random.RandomState(42)

Index alignment in Series

In [2]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                             'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                                   'New York': 19651127}, name='population')

In [5]:
population/area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [11]:
area.index ^ population.index

Index(['Alaska', 'New York'], dtype='object')

In [12]:
A = pd.Series([2, 4, 6], index=[0, 1, 2]) 
B = pd.Series([1, 3, 5], index=[1, 2, 3]) 
A+B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [15]:
A.add(B,fill_value=0).astype(int)

0    2
1    5
2    9
3    5
dtype: int64

Index alignment in DataFrame

In [20]:
A = pd.DataFrame(rng.randint(0,20,(2,2)),
                columns = list("AB"))
A

Unnamed: 0,A,B
0,10,3
1,7,2


In [21]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
                             columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,5,4,1
1,7,5,1
2,4,0,9


In [22]:
B.add(A)

Unnamed: 0,A,B,C
0,14.0,8.0,
1,12.0,9.0,
2,,,


In [25]:
fill = A.stack().mean()
fill

5.5

In [28]:
A.values.mean()

5.5

In [29]:
B.add(A,fill_value=fill)

Unnamed: 0,A,B,C
0,14.0,8.0,6.5
1,12.0,9.0,6.5
2,5.5,9.5,14.5


Pandas methods
add()
sub(),subtract()
mul(),multiply()
div(),divide()
floordiv()
mod()
pow()

In [30]:
A = rng.randint(10, size=(3, 4))
A

array([[5, 8, 0, 9],
       [2, 6, 3, 8],
       [2, 4, 2, 6]])

## Ufuncs: Operations Between DataFrame and Series

In [32]:
df = pd.DataFrame(A,columns = list("QRST"))
df

Unnamed: 0,Q,R,S,T
0,5,8,0,9
1,2,6,3,8
2,2,4,2,6


In [36]:
df.iloc[0,[0,2]]

Q    5
S    0
Name: 0, dtype: int64

In [43]:
df.subtract(df.iloc[0,:],axis = 1)

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-3,-2,3,-1
2,-3,-4,2,-3


## Handling Missing Data

In [44]:
vals1 = np.array([1, None, 3, 4])
vals1

array([1, None, 3, 4], dtype=object)

In [47]:
vals1.dtype

dtype('O')

In [48]:
vals2 = np.array([1, np.nan, 3, 4])

In [49]:
vals2.dtype

dtype('float64')

In [54]:
np.nansum(vals2),np.nanmin(vals2)

(8.0, 1.0)

### Operating on Null Values

In [55]:
data = pd.Series([1, np.nan, 'hello', None])

In [56]:
data.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [57]:
data[data.isnull()]

1     NaN
3    None
dtype: object

In [58]:
data.dropna()

0        1
2    hello
dtype: object

In [63]:
df = pd.DataFrame([[1,      np.nan, 2],
                    [2,      3,      5],
                   [np.nan, 4,      6]],
                 columns = list("ABC"))
df

Unnamed: 0,A,B,C
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [64]:
df.dropna()

Unnamed: 0,A,B,C
1,2.0,3.0,5


In [65]:
df.dropna(axis = 1)

Unnamed: 0,C
0,2
1,5
2,6


In [69]:
df[df["B"].notnull()] # delete rows with na value in B column

Unnamed: 0,A,B,C
1,2.0,3.0,5
2,,4.0,6


### filling null values

In [70]:
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
data

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64

In [71]:
data.fillna(method='ffill')

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64

In [72]:
data.fillna(method = "bfill")

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64