# Ch3.2 Operating on Data in Pandas

## Ufuncs: Index Preservation

In [1]:
import pandas as pd
import numpy as np

In [3]:
np.__version__,pd.__version__;print(type())

<class 'type'>


In [None]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

In [None]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
df

In [None]:
np.exp(ser)

In [None]:
np.exp(df)

In [None]:
np.exp(df['A'])

In [None]:
np.exp(df.iloc[1])

In [None]:
df.iloc[1].index

In [None]:
np.sin(df * np.pi / 4)

## UFuncs: Index Alignment

### Index alignment in Series

In [None]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [None]:
area

In [None]:
population

In [None]:
population / area
# The resulting array contains the union of indices of the two input arrays
# any missing values are filled in with NaN by default

In [None]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])

In [None]:
A

In [None]:
B

In [None]:
A + B

explicit specification of the fill value for any elements in ``A`` or ``B`` that might be missing:

In [None]:
A.add(B, fill_value=0)

### Index alignment in DataFrame

In [None]:
A = pd.DataFrame(rng.randint(0, 20, (2, 2)),
                 columns=list('AB'))
A

In [None]:
B = pd.DataFrame(rng.randint(0, 10, (3, 3)),
                 columns=list('BAC'))
B

In [None]:
A + B

In [None]:
A.add(B,fill_value=0)

fill with the mean of all values in ``A`` (computed by first stacking the rows of ``A``):

In [None]:
A.stack()

In [None]:
fill = A.stack().mean()
fill

In [None]:
fill = A.stack().mean()
A.add(B, fill_value=fill)

## Ufuncs: Operations Between DataFrame and Series

In [None]:
ar=np.array([[1,2,3],[10,20,30]]); ar

In [None]:
ar[0]

subtraction between a two-dimensional array and one of its rows is applied row-wise

In [None]:
ar-ar[0]

In Pandas, the convention similarly operates row-wise by default:

In [None]:
A = rng.randint(10, size=(3, 4))
A

In [None]:
df = pd.DataFrame(A, columns=list('QRST'))
df

In [None]:
df.iloc[0]

In [None]:
df - df.iloc[0]

In [None]:
df.subtract(df.iloc[0], axis=1)

operate column-wise by specifying the ``axis`` keyword:

In [None]:
df

In [None]:
df['R']

In [None]:
df.subtract(df['R'], axis=0)

In [None]:
df.subtract(df['R'], axis=1)
#df.subtract(df['R'])