### Manipulating DataFrames

In [24]:
import pandas as pd
data = {"first": ["Carl", "Francis", "Sam"],
        "last": ["Po", "Nyguen", "Smith"],
        "age": ["32","45","22"],
    }
clients = pd.DataFrame(data)
clients

Unnamed: 0,first,last,age
0,Carl,Po,32
1,Francis,Nyguen,45
2,Sam,Smith,22


### Renaming

In [25]:
clients.rename(columns = {"first": "First Name"},inplace=True)

In [26]:
clients.rename(index = {0:"a", 1:"b",2:"c"},inplace=True)


In [27]:
clients

Unnamed: 0,First Name,last,age
a,Carl,Po,32
b,Francis,Nyguen,45
c,Sam,Smith,22


In [28]:
clients.reset_index(inplace=True)

In [29]:
clients

Unnamed: 0,index,First Name,last,age
0,a,Carl,Po,32
1,b,Francis,Nyguen,45
2,c,Sam,Smith,22


### Dropping

In [30]:
clients.drop(columns = "First Name")

Unnamed: 0,index,last,age
0,a,Po,32
1,b,Nyguen,45
2,c,Smith,22


In [31]:
clients.drop(index = 0)

Unnamed: 0,index,First Name,last,age
1,b,Francis,Nyguen,45
2,c,Sam,Smith,22


### Set Type

In [32]:
clients.age

0    32
1    45
2    22
Name: age, dtype: object

In [33]:
clients.age.astype(int)

0    32
1    45
2    22
Name: age, dtype: int32

### Updating DataFrame Data

In [39]:
data2 = {"first": ["Carl", "Francis", "Sam"],
        "last": ["Po", "Nyguen", "Smith"],
        "age": ["32","45","22"],
        "CH_count": [12,14,39],
    }
clients2 = pd.DataFrame(data2)
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,12
1,Francis,Nyguen,45,14
2,Sam,Smith,22,39


### Adding Rows

In [40]:
new_data2 = {"first": ["Sue", "Boya"],
        "last": ["Rankler", "Maple"],
        "age": [93,12],
        "CH_count": [22,1],
    }
new_clients = pd.DataFrame(new_data2)
new_clients

Unnamed: 0,first,last,age,CH_count
0,Sue,Rankler,93,22
1,Boya,Maple,12,1


In [41]:
clients2 = pd.concat([clients2, new_clients])
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,12
1,Francis,Nyguen,45,14
2,Sam,Smith,22,39
0,Sue,Rankler,93,22
1,Boya,Maple,12,1


### Setting specific value

In [42]:
clients2.loc[1,"first"] = "Frankie"
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,12
1,Frankie,Nyguen,45,14
2,Sam,Smith,22,39
0,Sue,Rankler,93,22
1,Frankie,Maple,12,1


In [47]:
clients2.reset_index(inplace=True)
clients2

Unnamed: 0,index,first,last,age,CH_count
0,0,Carl,Po,32,12
1,1,Frankie,Nyguen,45,14
2,2,Sam,Smith,22,39
3,0,Sue,Rankler,93,22
4,1,Frankie,Maple,12,1


In [48]:
clients2.drop(columns=["index"], inplace=True)
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,12
1,Frankie,Nyguen,45,14
2,Sam,Smith,22,39
3,Sue,Rankler,93,22
4,Frankie,Maple,12,1


In [49]:
clients2.loc[0:1, "CH_count"] = -1
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,-1
1,Frankie,Nyguen,45,-1
2,Sam,Smith,22,39
3,Sue,Rankler,93,22
4,Frankie,Maple,12,1


### Math Operations

In [50]:
clients2["CH_count"] +1

0     0
1     0
2    40
3    23
4     2
Name: CH_count, dtype: int64

In [51]:
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,-1
1,Frankie,Nyguen,45,-1
2,Sam,Smith,22,39
3,Sue,Rankler,93,22
4,Frankie,Maple,12,1


In [52]:
clients2["CH_count"] -= 3
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,-4
1,Frankie,Nyguen,45,-4
2,Sam,Smith,22,36
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


### Replace

In [53]:
clients2.replace(-4,0)

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,0
1,Frankie,Nyguen,45,0
2,Sam,Smith,22,36
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


In [55]:
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,-4
1,Frankie,Nyguen,45,-4
2,Sam,Smith,22,36
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


In [60]:
clients2.replace(32,36,inplace=True)
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,32,-4
1,Frankie,Nyguen,45,-4
2,Sam,Smith,22,36
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


In [64]:
clients2["age"] = clients2["age"].astype(int)

In [65]:
clients2["age"]

0    32
1    45
2    22
3    93
4    12
Name: age, dtype: int32

In [66]:
clients2.replace(32,36,inplace=True)
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,36,-4
1,Frankie,Nyguen,45,-4
2,Sam,Smith,22,36
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


In [70]:
clients2["CH_count"].replace(36,37,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  clients2["CH_count"].replace(36,37,inplace=True)


In [71]:
clients2

Unnamed: 0,first,last,age,CH_count
0,Carl,Po,36,-4
1,Frankie,Nyguen,45,-4
2,Sam,Smith,22,37
3,Sue,Rankler,93,19
4,Frankie,Maple,12,-2


### DataFrame Apply

In [2]:
import pandas as pd
data = { "even": range(20,0,-2),
        "odd": range(1,21,2),    
}
df = pd.DataFrame(data)
df

Unnamed: 0,even,odd
0,20,1
1,18,3
2,16,5
3,14,7
4,12,9
5,10,11
6,8,13
7,6,15
8,4,17
9,2,19


In [3]:
sum(range(10))

45

### Apply

In [4]:
df.apply(sum)

even    110
odd     100
dtype: int64

In [5]:
df

Unnamed: 0,even,odd
0,20,1
1,18,3
2,16,5
3,14,7
4,12,9
5,10,11
6,8,13
7,6,15
8,4,17
9,2,19


In [8]:
df.apply(sum,axis=1)

0    21
1    21
2    21
3    21
4    21
5    21
6    21
7    21
8    21
9    21
dtype: int64

### Define Column Function

In [9]:
def hundred_plus(col):
    if sum(col) > 100:
        return "Grater than 100"
    return "Not greater than 100"

In [10]:
df.apply(hundred_plus)

even         Grater than 100
odd     Not greater than 100
dtype: object

### Define Row Function

In [13]:
def label_func(row):
    if row["even"] % 3 == 0:
        return True
    elif row["odd"] % 3 == 0:
        return True
    return False

In [14]:
df.apply(label_func, axis=1)

0    False
1     True
2    False
3    False
4     True
5    False
6    False
7     True
8    False
9    False
dtype: bool

### Expanding Results

In [15]:
def ret_list(row):
    ret_value = [False, False]
    if row["even"] > 6:
        ret_value[0] = True
    
    if row["odd"] > 6:
        ret_value[1] = True
    return ret_value

In [17]:
df.apply(ret_list, axis=1)

0    [True, False]
1    [True, False]
2    [True, False]
3     [True, True]
4     [True, True]
5     [True, True]
6     [True, True]
7    [False, True]
8    [False, True]
9    [False, True]
dtype: object

In [18]:
df.apply(ret_list, axis=1, result_type="expand")

Unnamed: 0,0,1
0,True,False
1,True,False
2,True,False
3,True,True
4,True,True
5,True,True
6,True,True
7,False,True
8,False,True
9,False,True


### Apply to Column

In [19]:
def div_three(row):
    if row % 3 == 0:
        return "Divisible by 3"
    return "Not divisible by 3"

In [20]:
df.even.apply(div_three)

0    Not divisible by 3
1        Divisible by 3
2    Not divisible by 3
3    Not divisible by 3
4        Divisible by 3
5    Not divisible by 3
6    Not divisible by 3
7        Divisible by 3
8    Not divisible by 3
9    Not divisible by 3
Name: even, dtype: object