# Operating on Data in Pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Any NumPy ufunc works on Pandas Series and DataFrame objects

rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    6
1    3
2    7
3    4
dtype: int64

In [3]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [4]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [5]:
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16


In [6]:
# Pandas will align indices in the process of performing operation
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

population / area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [7]:
population.divide(area, fill_value=1000)

Alaska            0.000580
California       90.413926
New York      19651.127000
Texas            38.018740
dtype: float64

In [8]:
# Operations between Series and DataFrame

A = rng.randint(10, size=(3, 4))
df = pd.DataFrame(A)
df

Unnamed: 0,0,1,2,3
0,1,7,5,1
1,4,0,9,5
2,8,0,9,2


In [10]:
df - df.iloc[0]

Unnamed: 0,0,1,2,3
0,0,0,0,0
1,3,-7,4,4
2,7,-7,4,1


In [14]:
df.subtract(df.iloc[0], axis=0)

Unnamed: 0,0,1,2,3
0,0.0,6.0,4.0,0.0
1,-3.0,-7.0,2.0,-2.0
2,3.0,-5.0,4.0,-3.0
3,,,,
