# Chapter 6: Operators (& Dunder Methods)

In [2]:
import pandas as pd
import numpy as np

In [3]:
url = "http://github.com/mattharrison/datasets/raw/master/data/vehicles.csv.zip"
df = pd.read_csv(url)
city_mpg = df.city08
highway_mpg = df.highway08

  df = pd.read_csv(url)


In [7]:
tmp_df = df[['city08','highway08']]

In [8]:
tmp_df

Unnamed: 0,city08,highway08
0,19,25
1,9,14
2,23,33
3,10,12
4,17,23
...,...,...
41139,19,26
41140,20,28
41141,18,24
41142,18,24


## 6.2 Dunder Methods

In [1]:
2+4

6

In [3]:
(2).__add__(4)

6

In [10]:

(city_mpg + highway_mpg) /2


Unnamed: 0,0
0,22.0
1,11.5
2,28.0
3,11.0
4,20.0
...,...
41139,22.5
41140,24.0
41141,21.0
41142,21.0


In [None]:
2 + 4

6

In [None]:
## under the covers, python runs this
(2).__add__(4)

6

## 6.3 Index Alignment

- Can apply most math operations on a series with another series
- Can also use scalar
- When operating with two series, pandas will align the index before performing the operation
- Aligning takes each index entry in the left series and match it up with every entry with the same name in the index of the right series
- Make sure that the indexes are unique (no duplicates) and are common to both series

In [None]:
# repeated index series
s1 = pd.Series([10, 20, 30], index=[1, 2, 2])
s2 = pd.Series([35, 44, 53], index=[2, 2, 4], name='s2')

In [11]:
s1 = pd.Series([10,20,30], index = [1,2,2])
s1

Unnamed: 0,0
1,10
2,20
2,30


In [13]:
s2 = pd.Series([40,50,60], index = [2,2,42], name = 's2')
s2

Unnamed: 0,s2
2,40
2,50
42,60


In [14]:
s1+s2

Unnamed: 0,0
1,
2,60.0
2,70.0
2,70.0
2,80.0
42,


In [15]:
s1.add(s2, fill_value = 0)

Unnamed: 0,0
1,10.0
2,60.0
2,70.0
2,70.0
2,80.0
42,60.0


In [16]:
s1//s2

Unnamed: 0,0
1,
2,0.0
2,0.0
2,0.0
2,0.0
42,


In [17]:
s1/s2

Unnamed: 0,0
1,
2,0.5
2,0.4
2,0.75
2,0.6
42,


In [21]:
city_mpg.add(highway_mpg).div(2)

Unnamed: 0,0
0,22.0
1,11.5
2,28.0
3,11.0
4,20.0
...,...
41139,22.5
41140,24.0
41141,21.0
41142,21.0


In [4]:
city_mpg.pow(2)

Unnamed: 0,city08
0,361
1,81
2,529
3,100
4,289
...,...
41139,361
41140,400
41141,324
41142,324


In [6]:
city_mpg.eq(highway_mpg).nunique()

2

In [7]:
city_mpg.eq(highway_mpg).sort_values()

Unnamed: 0,0
0,False
27401,False
27402,False
27403,False
27404,False
...,...
17064,True
7600,True
19105,True
20391,True


In [9]:
city_mpg <= highway_mpg

Unnamed: 0,0
0,True
1,True
2,True
3,True
4,True
...,...
41139,True
41140,True
41141,True
41142,True


In [10]:
city_mpg.gt(highway_mpg)

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
41139,False
41140,False
41141,False
41142,False


In [11]:
city_mpg

Unnamed: 0,city08
0,19
1,9
2,23
3,10
4,17
...,...
41139,19
41140,20
41141,18
41142,18


In [22]:
df['new_col2'] = highway_mpg-city_mpg

In [23]:
df

Unnamed: 0,barrels08,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,...,charge240b,c240bDscr,createdOn,modifiedOn,startStop,phevCity,phevHwy,phevComb,new_col,new_col2
0,15.695714,0.0,0.0,0.0,19,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,6.0
1,29.964545,0.0,0.0,0.0,9,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,5.0
2,12.207778,0.0,0.0,0.0,23,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,10.0
3,29.964545,0.0,0.0,0.0,10,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,2.0
4,17.347895,0.0,0.0,0.0,17,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41139,14.982273,0.0,0.0,0.0,19,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,7.0
41140,14.330870,0.0,0.0,0.0,20,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,8.0
41141,15.695714,0.0,0.0,0.0,18,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,6.0
41142,15.695714,0.0,0.0,0.0,18,0.0,0,0.0,0.0,0.0,...,0.0,,Tue Jan 01 00:00:00 EST 2013,Tue Jan 01 00:00:00 EST 2013,,0,0,0,30,6.0


Unnamed: 0,city08
0,19
1,9
2,23
3,10
4,17
...,...
41140,20
41141,18
41142,18
41143,16


In [24]:
city_mpg

Unnamed: 0,city08
0,19
1,9
2,23
3,10
4,17
...,...
41140,20
41141,18
41142,18
41143,16


In [25]:
city_mpg.add(100)

Unnamed: 0,city08
0,119
1,109
2,123
3,110
4,117
...,...
41140,120
41141,118
41142,118
41143,116


In [27]:
city_mpg.add(city_mpg)

Unnamed: 0,city08
0,38
1,18
2,46
3,20
4,34
...,...
41140,40
41141,36
41142,36
41143,32


In [34]:
city_mpg.quantile([.1,.25,.50,.75,.9,.99])

Unnamed: 0,city08
0.1,13.0
0.25,15.0
0.5,17.0
0.75,20.0
0.9,24.0
0.99,40.0


In [29]:
city_mpg.is_unique

False

In [32]:
city_mpg.sort_values().is_monotonic_increasing

True

## 6.4 Broadcasting

- When we perform math operations with a scalar, pandas broadcasts the operation to all values
- Advantage to broadcasting is that the operations are optimized and happen very quickly in the CPU (vectorization)

## 6.5 Iteration

- Avoid using a for loop with series

## 6.6 Operator Methods

- Dunder methods fill in ``NaN`` when one of the operands in missing following index alignment

In [None]:
s1 + s2

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [None]:
s1.add(s2)

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [None]:
# can allow us to specify missing parameter values
s1.add(s2, fill_value=0)

1    10.0
2    55.0
2    64.0
2    65.0
2    74.0
4    53.0
dtype: float64

## 6.7 Chaining

- Most pandas method do not mutate data in place but return a new object. This allows us to chain operators. Examples:

| Method | Operator | Description |
| --- | --- | --- |
| s.add(s2) | s + s2 | Adds series |
| s.radd(s2) | s2 + s | Adds series |
| s.sub(s2) | s - s2 | Subtracts series |
| s.rsub(s2) | s2 - s2| Subtracts series |
| s.mul(s2).s.multiply(s2) | s * s2 | Multiplies series |
| s.rmul(s2) | s2 * s | Multiplies series |
| s.div(s2).s.truediv(s2) | s / s2 | Divides series |

In [None]:
# calculate the average of city and highway mileage using operators
((city_mpg + highway_mpg)/2)

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: float64

In [None]:
# chaining to calculate the average of city and highway mileage
(city_mpg
.add(highway_mpg)
.div(2))

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: float64