# Manipulating values in dataframe

### Best practice

In [23]:
import pandas as pd

In [24]:
titanic = pd.read_csv('titanic.csv')

In [25]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### changing a single value ( Option 1 with loc)

In [26]:
titanic.loc[1, 'age'] = 40  # change age of row 1 to 40

In [27]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing a single value ( option 2 with iloc )

In [28]:
titanic.iloc[1, 3] = 41    # row = 1 column = 3

In [29]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


####  Changing multiple values ( option 1: loc)

In [30]:
titanic.loc[1:3, 'age'] = 42  # REMEMBER!!! both ends are including in loc

In [31]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple vales ( option 2: iloc )

In [32]:
titanic.iloc[1:4, 3] = [54, 56, 78]

In [33]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,54.0,1,0,71.2833,C,C
2,1,3,female,56.0,0,0,7.925,S,
3,1,1,female,78.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple values in column ( option 3: boolean indexing)

In [34]:
index_babies = titanic.loc[titanic.age < 1, 'age'].index  # saving index objects onject of all babies

In [35]:
titanic.loc[titanic.age < 1, 'age'] = 1  # rounding all baby ages to 1

In [36]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


### Changing multiple values in a row

In [37]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,54.0,1,0,71.2833,C,C
2,1,3,female,56.0,0,0,7.925,S,
3,1,1,female,78.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [39]:
titanic.loc[0, 'survived':'sex'] = [1, 1, 'female']

In [40]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,0,7.25,S,
1,1,1,female,54.0,1,0,71.2833,C,C
2,1,3,female,56.0,0,0,7.925,S,
3,1,1,female,78.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### Changing multiple values in multiple rows/columns

In [42]:
titanic.replace(0, 'Zero')  # replace all 0s with string zero, this also has an inplace parameter

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,Zero,7.25,S,
1,1,1,female,54.0,1,Zero,71.2833,C,C
2,1,3,female,56.0,Zero,Zero,7.925,S,
3,1,1,female,78.0,1,Zero,53.1,S,C
4,Zero,3,male,35.0,Zero,Zero,8.05,S,
...,...,...,...,...,...,...,...,...,...
886,Zero,2,male,27.0,Zero,Zero,13.0,S,
887,1,1,female,19.0,Zero,Zero,30.0,S,B
888,Zero,3,female,,1,2,23.45,S,
889,1,1,male,26.0,Zero,Zero,30.0,C,C


## How NOT to do it ( Part 1)

In [43]:
import pandas as pd

In [44]:
titanic = pd.read_csv('titanic.csv')

In [45]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


Example 1:

In [46]:
age = titanic.age

In [47]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [48]:
age[1] = 40 # actualy works but not recommended

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [49]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [54]:
titanic.head()  # change reflects here as well

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [53]:
titanic.age[1] = 41  # Chained indexing

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic.age[1] = 41  # Chained indexing


In [55]:
titanic.loc[1, 'age'] = 42 # This isn't chained indexing and best way.

Chained indexing is bad, warning is bad!

Example 2:

In [57]:
slice1 = titanic[['sex', 'age']]
slice1.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,42.0
2,female,26.0
3,female,35.0
4,male,35.0


In [61]:
slice1.iloc[1,1] = 43  # still chained indexing

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [59]:
slice1  # worked

Unnamed: 0,sex,age
0,male,22.0
1,female,43.0
2,female,26.0
3,female,35.0
4,male,35.0
...,...,...
886,male,27.0
887,female,19.0
888,female,
889,male,26.0


In [60]:
titanic.head()  # original dataframe isn't changed

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


Example 3:

In [68]:
slice2 = titanic.loc[:, ['sex', 'age']]

In [69]:
slice2

Unnamed: 0,sex,age
0,male,22.0
1,female,42.0
2,female,26.0
3,female,35.0
4,male,35.0
...,...,...
886,male,27.0
887,female,19.0
888,female,
889,male,26.0


In [70]:
slice2.iloc[1,1] = 44

In [71]:
slice2.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,44.0
2,female,26.0
3,female,35.0
4,male,35.0


In [73]:
titanic.head()  # no change

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


Example 4: boolean indexing first, then slicing for 'age'

In [74]:
titanic = pd.read_csv('titanic.csv')

In [75]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [76]:
index_babies = titanic[titanic.age < 1].index

In [80]:
titanic.loc[titanic.age < 1]['age'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic.loc[titanic.age < 1]['age'] = 1


In [84]:
titanic.loc[index_babies]  #did not change the values, mission failed

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


Example 5: slicing for 'age' first then boolean indexing, opposite order of example 4

In [87]:
titanic['age'][titanic.age < 1] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic['age'][titanic.age < 1] = 1


In [89]:
titanic.loc[index_babies]  #worked this time, very confusing

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


Example 6:

In [94]:
titanic[['sex', 'age']][titanic.age == 1]['age'] = 0     #no warnings this time!!

In [95]:
titanic.loc[index_babies]   # didn't change in original data

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


## View vs copy

### Slicing a Dataframe / creating a view on the original DataFrame 

In [96]:
import pandas as pd

In [97]:
titanic = pd.read_csv('titanic.csv')

In [98]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


REMEMBER!!! When slicing an entire column, it's a view.

In [99]:
age = titanic.age  # even sq brackett notation is a view

In [100]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [101]:
age._is_view  # checking if it's a view or copy

True

In [103]:
age._is_copy is None  # not a copy

True

In [104]:
age[1] = 40  # warning b/c chained indexing

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [105]:
titanic.head()  # this is changed

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### Slicing a dataframe: creating a copy of the original DataFrame

In [106]:
df_baby = titanic[titanic.age < 1]

In [108]:
df_baby._is_view # not a view, but a copy

False

In [111]:
df_baby._is_copy is None  # means it's a copy

False

In [112]:
df_baby.age = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [114]:
df_baby # change happened here

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1,0,2,29.0,S,
305,1,1,male,1,1,2,151.55,S,C
469,1,3,female,1,2,1,19.2583,C,
644,1,3,female,1,2,1,19.2583,C,
755,1,2,male,1,1,1,14.5,S,
803,1,3,male,1,0,1,8.5167,C,
831,1,2,male,1,1,1,18.75,S,


In [117]:
titanic.loc[index_babies]  # did not happen here cuz df_baby was a copy

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


---------------------------------------------------------------------------------------------------

# Rules for Manipulating

### If you want to work with the whole Dataframe...

### ... avoid chained Indexing!!

In [119]:
import pandas as pd

In [120]:
titanic = pd.read_csv('titanic.csv')

In [121]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [122]:
titanic.loc[1,3] = 40

In [123]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,3
0,0,3,male,22.0,1,0,7.25,S,,
1,1,1,female,38.0,1,0,71.2833,C,C,40.0
2,1,3,female,26.0,0,0,7.925,S,,
3,1,1,female,35.0,1,0,53.1,S,C,
4,0,3,male,35.0,0,0,8.05,S,,


In [124]:
index_babies = titanic.loc[titanic.age < 1, 'age'].index

In [125]:
titanic.loc[titanic.age < 1, 'age'] = 1

In [127]:
titanic.loc[index_babies]  # worked flawlessly

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,3
78,1,2,male,1.0,0,2,29.0,S,,
305,1,1,male,1.0,1,2,151.55,S,C,
469,1,3,female,1.0,2,1,19.2583,C,,
644,1,3,female,1.0,2,1,19.2583,C,,
755,1,2,male,1.0,1,1,14.5,S,,
803,1,3,male,1.0,0,1,8.5167,C,,
831,1,2,male,1.0,1,1,18.75,S,,


----------------------------------------------------------------------------------------------

### If you want to work with and manipulate a slice of a dataframe...

### ... make copy with .copy( )

In [128]:
import pandas as pd

In [129]:
titanic = pd.read_csv('titanic.csv')

In [130]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [131]:
age = titanic.age.copy()

In [132]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [134]:
age[1] = 40 # no warnings

In [135]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [136]:
titanic.head()     #no changes in original

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [137]:
baby_ages = titanic.loc[titanic.age < 1, ['age', 'sex']].copy()

In [138]:
baby_ages

Unnamed: 0,age,sex
78,0.83,male
305,0.92,male
469,0.75,female
644,0.75,female
755,0.67,male
803,0.42,male
831,0.83,male


In [139]:
baby_ages['age'] = 1

In [141]:
baby_ages  # worked

Unnamed: 0,age,sex
78,1,male
305,1,male
469,1,female
644,1,female
755,1,male
803,1,male
831,1,male


In [145]:
titanic.loc[index_babies]  # no effects here

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,
