## Manipulating values in a dataframe

## How should you do it : Best Practice

In [7]:
import pandas as pd
titanic = pd.read_csv('titanic.csv')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## We are working with a whole DataFrame

## 1.Changing a single value(option 1 with loc) 

In [8]:
titanic.loc[1,'age']

38.0

In [9]:
titanic.loc[1,'age'] = 40

In [10]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## 2.Changing a single value(option 2 with iloc)

In [11]:
titanic.iloc[1,3]

40.0

In [12]:
titanic.iloc[1,3] = 41

In [13]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## 3.Changing multiple values in a column(option 1 with loc)

In [15]:
titanic.loc[1:3,'age']

1    41.0
2    26.0
3    35.0
Name: age, dtype: float64

In [16]:
titanic.loc[1:3,'age'] = 42

In [17]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## 4.Changing multiple values in a column(option 2 with iloc)

In [18]:
titanic.iloc[1:4,3]

1    42.0
2    42.0
3    42.0
Name: age, dtype: float64

In [19]:
titanic.iloc[1:4,3] = [43,44,45]

In [20]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## 5.Changing multiple values in a column with boolean indexing

In [21]:
index_babies = titanic.loc[titanic.age < 1,'age'].index
index_babies

Int64Index([78, 305, 469, 644, 755, 803, 831], dtype='int64')

In [22]:
titanic.loc[titanic.age < 1, 'age']

78     0.83
305    0.92
469    0.75
644    0.75
755    0.67
803    0.42
831    0.83
Name: age, dtype: float64

In [23]:
titanic.loc[titanic.age < 1, 'age'] = 1

In [25]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


## 6.Changing multiple values in a row

In [26]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [27]:
titanic.loc[0,'survived':'sex'] = [1, 1, 'female']

In [28]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## 7.Changing multiple values in multiple rows and columns

In [29]:
titanic.replace(0,'Zero')

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,Zero,7.25,S,
1,1,1,female,43.0,1,Zero,71.2833,C,C
2,1,3,female,44.0,Zero,Zero,7.925,S,
3,1,1,female,45.0,1,Zero,53.1,S,C
4,Zero,3,male,35.0,Zero,Zero,8.05,S,
...,...,...,...,...,...,...,...,...,...
886,Zero,2,male,27.0,Zero,Zero,13,S,
887,1,1,female,19.0,Zero,Zero,30,S,B
888,Zero,3,female,,1,2,23.45,S,
889,1,1,male,26.0,Zero,Zero,30,C,C
