# Arithmetic Operations

## Data Cleaning

In [4]:
import pandas as pd

In [6]:
titanic = pd.read_csv("titanic.csv")
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [9]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   survived  891 non-null    int64  
 1   pclass    891 non-null    int64  
 2   sex       891 non-null    object 
 3   age       714 non-null    float64
 4   sibsp     891 non-null    int64  
 5   parch     891 non-null    int64  
 6   fare      891 non-null    float64
 7   embarked  889 non-null    object 
 8   deck      203 non-null    object 
dtypes: float64(2), int64(4), object(3)
memory usage: 62.8+ KB


In [15]:
titanic.age.fillna(value = titanic.age.mean(), inplace = True)

## Add/Sub/Mul/Div of Columns

- NOTE THAT THESE METHODS ARE NOT IN PLACE!! NEED TO ASSIGN

In [23]:
#sibsp = siblings and spouse, parch = parent and childen
#lets say we don't care to distinguish them so we put them together!
titanic["no_relat"] = titanic.sibsp.add(titanic.parch)
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,no_relat
0,0,3,male,22.0,1,0,7.25,S,,1
1,1,1,female,38.0,1,0,71.2833,C,C,1
2,1,3,female,26.0,0,0,7.925,S,,0
3,1,1,female,35.0,1,0,53.1,S,C,1
4,0,3,male,35.0,0,0,8.05,S,,0


In [79]:
sales = pd.read_csv("sales.csv", index_col = 0)
sales.head()

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,34,27,15,,33
Mike,45,9,74,87.0,12
Andi,17,33,54,8.0,29
Paul,87,67,27,45.0,7


In [81]:
# notice that sales Thu has NaN column
sales.Mon.add(sales.Thu) 

Steven      NaN
Mike      132.0
Andi       25.0
Paul      132.0
dtype: float64

In [83]:
#add method has a parameter called fill value!
sales.Mon.add(sales.Thu, fill_value = 0)

Steven     34.0
Mike      132.0
Andi       25.0
Paul      132.0
dtype: float64

In [31]:
sales["perc_bonus"] = [0.12, 0.15, 0.10, 0.20]
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_bonus
Steven,34,27,15,,33,0.12
Mike,45,9,74,87.0,12,0.15
Andi,17,33,54,8.0,29,0.1
Paul,87,67,27,45.0,7,0.2


In [33]:
sales.Thu.mul(sales.perc_bonus, fill_value = 0)

Steven     0.00
Mike      13.05
Andi       0.80
Paul       9.00
dtype: float64

In [40]:
# Lets calculate the perc_bonus in $ terms for the whole week
# The .sum(axis=1) method computes the sum of values along the row (horizontally)
sales["bonus"] = sales.iloc[:,:-1].sum(axis = 1).mul(sales.perc_bonus)
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_bonus,bonus
Steven,34,27,15,,33,0.12,13.0944
Mike,45,9,74,87.0,12,0.15,34.0725
Andi,17,33,54,8.0,29,0.1,14.11
Paul,87,67,27,45.0,7,0.2,46.64


## Add/Sub/Mul/Div with Scaler Value

In [55]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,no_relat
0,0,3,male,22.0,1,0,7.25,S,,1
1,1,1,female,38.0,1,0,71.2833,C,C,1
2,1,3,female,26.0,0,0,7.925,S,,0
3,1,1,female,35.0,1,0,53.1,S,C,1
4,0,3,male,35.0,0,0,8.05,S,,0


In [57]:
titanic["YoB"] = titanic.age.sub(1912, fill_value = 0).mul(-1)
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,no_relat,YoB
0,0,3,male,22.0,1,0,7.25,S,,1,1890.0
1,1,1,female,38.0,1,0,71.2833,C,C,1,1874.0
2,1,3,female,26.0,0,0,7.925,S,,0,1886.0
3,1,1,female,35.0,1,0,53.1,S,C,1,1877.0
4,0,3,male,35.0,0,0,8.05,S,,0,1877.0


In [61]:
titanic.drop(columns = ["sibsp", "parch", "deck", "YoB"], inplace = True)

In [63]:
sales

Unnamed: 0,Mon,Tue,Wed,Thu,Fri,perc_bonus,bonus
Steven,34,27,15,,33,0.12,13.0944
Mike,45,9,74,87.0,12,0.15,34.0725
Andi,17,33,54,8.0,29,0.1,14.11
Paul,87,67,27,45.0,7,0.2,46.64


In [65]:
#lets say each sales man has fixed cost of $5 every day 
fixed_cost = 5

In [93]:
#we want to calculate the net sales each day
sales.loc[:, "Mon":"Fri"].sub(fixed_cost, fill_value = 0)

Unnamed: 0,Mon,Tue,Wed,Thu,Fri
Steven,29,22,10,-5.0,28
Mike,40,4,69,82.0,7
Andi,12,28,49,3.0,24
Paul,82,62,22,40.0,2


In [107]:
#this is if we want to reassign the actual values
sales.loc[:, "Mon":"Fri"] = sales.loc[:, "Mon":"Fri"].sub(fixed_cost, fill_value = 0)