# Manipulating Values in DataFrame

# Best Practise

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# Changing a single Value (Option 1 with loc)

In [8]:
titanic.loc[1,"age"] = 40

38.0

In [5]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# Changing a single value (Option 2 with iloc)

In [12]:
titanic.iloc[1,3] = 38

'female'

In [7]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# Changing multiple values in a column (Option 1 with loc)

In [22]:
titanic.loc[1:3,'age'] = 42

1    34.0
2    23.0
3    21.0
Name: age, dtype: float64

In [15]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# Changing multiple values in a column (Option 2 with iloc)

In [23]:
titanic.iloc[1:4,3] = [34,23,21]

In [24]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,34.0,1,0,71.2833,C,C
2,1,3,female,23.0,0,0,7.925,S,
3,1,1,female,21.0,1,0,53.1,S,C
4,0,3,male,42.0,0,0,8.05,S,


# Changing multiple values in a column (Option 3 with boolean indexing)

In [28]:
index_babi = titanic.loc[titanic.age<1,"age"].index

In [29]:
titanic.loc[titanic.age<1,"age"] = 1

In [30]:
titanic.loc[index_babi]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


# Changing multiple values in a row

In [31]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,34.0,1,0,71.2833,C,C
2,1,3,female,23.0,0,0,7.925,S,
3,1,1,female,21.0,1,0,53.1,S,C
4,0,3,male,42.0,0,0,8.05,S,


In [36]:
titanic.loc[0,"survived":"sex"] = [1,1,"female"]

In [37]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,0,7.25,S,
1,1,1,female,34.0,1,0,71.2833,C,C
2,1,3,female,23.0,0,0,7.925,S,
3,1,1,female,21.0,1,0,53.1,S,C
4,0,3,male,42.0,0,0,8.05,S,


# Changing multiple values in multiple rows/columns

In [41]:
titanic.replace(0,"Zero")

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,Zero,7.25,S,
1,1,1,female,34.0,1,Zero,71.2833,C,C
2,1,3,female,23.0,Zero,Zero,7.925,S,
3,1,1,female,21.0,1,Zero,53.1,S,C
4,Zero,3,male,42.0,Zero,Zero,8.05,S,
...,...,...,...,...,...,...,...,...,...
886,Zero,2,male,27.0,Zero,Zero,13.0,S,
887,1,1,female,19.0,Zero,Zero,30.0,S,B
888,Zero,3,female,,1,2,23.45,S,
889,1,1,male,26.0,Zero,Zero,30.0,C,C


In [42]:
titanic.replace("Zero",0)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,0,7.2500,S,
1,1,1,female,34.0,1,0,71.2833,C,C
2,1,3,female,23.0,0,0,7.9250,S,
3,1,1,female,21.0,1,0,53.1000,S,C
4,0,3,male,42.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


# How you should NOT do it (part 1)

In [44]:
import pandas as pd

In [45]:
titanic = pd.read_csv("titanic.csv")

In [46]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [47]:
age = titanic.age

In [48]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [49]:
age[1] = 40

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [50]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [51]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [52]:
titanic.age[1] = 41 #this is chained indexing!!!!

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic.age[1] = 41 #this is chained indexing!!!!


In [53]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [57]:
titanic.loc[1,"age"] = 38 #best way

In [56]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# View vs. Copy

# Slicing a DataFrame / creating a view on the original DataFrame

In [60]:
import pandas as pd

In [61]:
titanic = pd.read_csv("titanic.csv")

In [62]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [65]:
age = titanic.age #create the view, if you change the view, you'll change the original data frame

In [66]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [67]:
age._is_view

True

In [68]:
age._is_copy is None

True

In [69]:
age[1] = 40

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [70]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [71]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


# Slicing a DataFrame / creating a copy of the original DataFrame

In [72]:
df_baby = titanic[titanic.age < 1]

In [73]:
df_baby._is_view

False

In [74]:
df_baby._is_copy is None

False

In [76]:
df_baby._is_copy() #when you slice you create a copy

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


In [77]:
df_baby.age = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_baby.age = 1


In [78]:
titanic.loc[titanic.age < 1]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


# If you want to work with and manipulate the whole DataFrame..

# .... avoid chained indexing!!!!

In [91]:
import pandas as pd

In [92]:
titanic = pd.read_csv("titanic.csv")

In [93]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [94]:
titanic.iloc[1,3] = 40

In [95]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [96]:
index_baby = titanic.loc[titanic.age < 1, "age"].index

In [97]:
titanic.loc[titanic.age < 1, "age"] = 1

In [98]:
titanic.loc[index_baby]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


# if you want to work with and manipulate a Slice of a DataFrame...

# ... and make a copy with .copy()

In [99]:
import pandas as pd

In [100]:
titanic = pd.read_csv("titanic.csv")

In [110]:
age = titanic.age.copy() #create a copy, so if you change it, it only affacts the copy

In [103]:
age[1] = 40

In [104]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [105]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [106]:
baby_age = titanic.loc[titanic.age < 1, ["age", "sex"]].copy()

In [107]:
baby_age

Unnamed: 0,age,sex
78,0.83,male
305,0.92,male
469,0.75,female
644,0.75,female
755,0.67,male
803,0.42,male
831,0.83,male


In [108]:
baby_age["age"] = 1

In [109]:
baby_age

Unnamed: 0,age,sex
78,1,male
305,1,male
469,1,female
644,1,female
755,1,male
803,1,male
831,1,male
