# La méthode `apply()`
Applique une fonction le long d'un axe du DataFrame.

Les objets passés à la fonction sont des objets Series dont l'index est soit les lignes du DataFrame (axis=0) soit les colonnes du DataFrame (axis=1).

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")
titanic["age"] = titanic["age"].replace(["?"], [None]).astype("float")
titanic["fare"] = titanic["fare"].replace(["?"], [None]).astype("float")

## Appliquer une fonction à des éléments de DataFrame avec apply()

In [3]:
def years_to_days(yrs):
    return yrs*365

titanic["age"].apply(years_to_days)

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

In [4]:
def get_age_group(age):
    if age < 2:
        return "infant"
    elif age < 12:
        return "child"
    elif age < 18:
        return "teen"
    elif age < 50:
        return "adult"
    else:
        return "senior"

In [5]:
titanic["age_group"] = titanic["age"].apply(get_age_group)
titanic

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,age_group
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,2,?,"St Louis, MO",adult
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,11,?,"Montreal, PQ / Chesterville, ON",infant
2,1,0,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON",child
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,?,135,"Montreal, PQ / Chesterville, ON",adult
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON",adult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3,0,"Zabour, Miss. Hileni",female,14.5000,1,0,2665,14.4542,?,C,?,328,?,teen
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,?,C,?,?,?,senior
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.5000,0,0,2656,7.2250,?,C,?,304,?,adult
1307,3,0,"Zakarian, Mr. Ortin",male,27.0000,0,0,2670,7.2250,?,C,?,?,?,adult


In [6]:
titanic.age_group.value_counts()

adult     782
senior    373
child      69
teen       63
infant     22
Name: age_group, dtype: int64

In [7]:
df = titanic[["pclass", "survived", "age", "fare"]]

In [8]:
def get_range(s):
    return s.max() - s.min()

In [9]:
df.apply(get_range)

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

## apply() avec lambda

In [10]:
titanic["fare"] * 24

0       5072.1000
1       3637.2000
2       3637.2000
3       3637.2000
4       3637.2000
          ...    
1304     346.9008
1305     346.9008
1306     173.4000
1307     173.4000
1308     189.0000
Name: fare, Length: 1309, dtype: float64

In [11]:
titanic["fare"].apply(lambda x: f"${x*24}")

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

## apply() le long d'un axe spécifié

In [12]:
def get_fam_size(s):
    fam_size = s.sibsp + s.parch
    if fam_size == 0:
        return "solo"
    elif fam_size < 5:
        return "average"
    else:
        return "large"

In [32]:
titanic.apply(get_fam_size,axis=1)

0          solo
1       average
2       average
3       average
4       average
         ...   
1304    average
1305    average
1306       solo
1307       solo
1308       solo
Length: 1309, dtype: object

In [14]:
titanic["fam_size"] = titanic.apply(get_fam_size, axis=1)

In [15]:
titanic["fam_size"].value_counts()

solo       790
average    459
large       60
Name: fam_size, dtype: int64

In [16]:
df.apply(get_range, axis=1)

0       210.3375
1       150.6333
2       151.5500
3       151.5500
4       151.5500
          ...   
1304     14.5000
1305     14.4542
1306     26.5000
1307     27.0000
1308     29.0000
Length: 1309, dtype: float64

In [17]:
df.apply(get_range, axis=0)

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

## apply() avec des arguments

In [18]:
def convert_currency(num, multiplier):
    return f"${num*multiplier}"

In [19]:
titanic["fare"].apply(convert_currency, args=(15,))

0       $3170.0625
1         $2273.25
2         $2273.25
3         $2273.25
4         $2273.25
           ...    
1304      $216.813
1305      $216.813
1306      $108.375
1307      $108.375
1308      $118.125
Name: fare, Length: 1309, dtype: object

## apply() avec arguments positionnels et mots-clés

In [20]:
def convert_currency(num, x, y, multiplier):
    return f"${num*multiplier+x+y}"

In [21]:
titanic["fare"].apply(convert_currency, args=(1,0), multiplier=15)

0       $3171.0625
1         $2274.25
2         $2274.25
3         $2274.25
4         $2274.25
           ...    
1304      $217.813
1305      $217.813
1306      $109.375
1307      $109.375
1308      $119.125
Name: fare, Length: 1309, dtype: object

## apply() avec groupby
Applique la fonction func par groupe et combine les résultats ensemble.

La fonction définie pour apply doit prendre un dataframe comme premier argument et retourner un DataFrame, une série ou un scalaire. apply se chargera ensuite de combiner les résultats en un seul dataframe ou série. apply est donc une méthode de regroupement très flexible.

In [22]:
def family_size(s):
    return s.sibsp + s.parch+1

In [23]:
titanic.groupby("sex").apply(family_size)

sex         
female  0       1
        2       4
        4       4
        6       2
        8       3
               ..
male    1302    1
        1303    1
        1306    1
        1307    1
        1308    1
Length: 1309, dtype: int64

In [24]:
titanic.groupby(["sex", "pclass"]).apply(family_size)

sex     pclass      
female  1       0       1
                2       4
                4       4
                6       2
                8       3
                       ..
male    3       1302    1
                1303    1
                1306    1
                1307    1
                1308    1
Length: 1309, dtype: int64

# La méthode `map( )` 
Fait correspondre les valeurs de Series en fonction d'un mappage ou d'une fonction d'entrée.

Utilisé pour remplacer chaque valeur d'une série par une autre valeur, qui peut être dérivée d'une fonction, d'un dict ou d'une série.

In [25]:
titanic["pclass"]

0       1
1       1
2       1
3       1
4       1
       ..
1304    3
1305    3
1306    3
1307    3
1308    3
Name: pclass, Length: 1309, dtype: int64

In [26]:
titanic["pclass"].map({1:"1st", 2:"2nd", 3:"3rd"})

0       1st
1       1st
2       1st
3       1st
4       1st
       ... 
1304    3rd
1305    3rd
1306    3rd
1307    3rd
1308    3rd
Name: pclass, Length: 1309, dtype: object

In [27]:
titanic["sex"].map('This person is {}'.format)

0       This person is female
1         This person is male
2       This person is female
3         This person is male
4       This person is female
                ...          
1304    This person is female
1305    This person is female
1306      This person is male
1307      This person is male
1308      This person is male
Name: sex, Length: 1309, dtype: object

## La méthode `applymap( )`
Appliquer une fonction à un Dataframe par élément.

Cette méthode applique une fonction qui accepte et renvoie un scalaire à chaque élément d'un DataFrame.

In [28]:
titanic[["name", "sex", "age_group"]].applymap(str.upper)

Unnamed: 0,name,sex,age_group
0,"ALLEN, MISS. ELISABETH WALTON",FEMALE,ADULT
1,"ALLISON, MASTER. HUDSON TREVOR",MALE,INFANT
2,"ALLISON, MISS. HELEN LORAINE",FEMALE,CHILD
3,"ALLISON, MR. HUDSON JOSHUA CREIGHTON",MALE,ADULT
4,"ALLISON, MRS. HUDSON J C (BESSIE WALDO DANIELS)",FEMALE,ADULT
...,...,...,...
1304,"ZABOUR, MISS. HILENI",FEMALE,TEEN
1305,"ZABOUR, MISS. THAMINE",FEMALE,SENIOR
1306,"ZAKARIAN, MR. MAPRIEDEDER",MALE,ADULT
1307,"ZAKARIAN, MR. ORTIN",MALE,ADULT


In [29]:
titanic[["name", "sex", "age_group"]].applymap(len)

Unnamed: 0,name,sex,age_group
0,29,6,5
1,30,4,6
2,28,6,5
3,36,4,5
4,47,6,5
...,...,...,...
1304,20,6,4
1305,21,6,6
1306,25,4,5
1307,19,4,5


In [30]:
def add_year(text):
    return str(text) + '_2022'

titanic.applymap(add_year)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,age_group,fam_size
0,1_2022,1_2022,"Allen, Miss. Elisabeth Walton_2022",female_2022,29.0_2022,0_2022,0_2022,24160_2022,211.3375_2022,B5_2022,S_2022,2_2022,?_2022,"St Louis, MO_2022",adult_2022,solo_2022
1,1_2022,1_2022,"Allison, Master. Hudson Trevor_2022",male_2022,0.9167_2022,1_2022,2_2022,113781_2022,151.55_2022,C22 C26_2022,S_2022,11_2022,?_2022,"Montreal, PQ / Chesterville, ON_2022",infant_2022,average_2022
2,1_2022,0_2022,"Allison, Miss. Helen Loraine_2022",female_2022,2.0_2022,1_2022,2_2022,113781_2022,151.55_2022,C22 C26_2022,S_2022,?_2022,?_2022,"Montreal, PQ / Chesterville, ON_2022",child_2022,average_2022
3,1_2022,0_2022,"Allison, Mr. Hudson Joshua Creighton_2022",male_2022,30.0_2022,1_2022,2_2022,113781_2022,151.55_2022,C22 C26_2022,S_2022,?_2022,135_2022,"Montreal, PQ / Chesterville, ON_2022",adult_2022,average_2022
4,1_2022,0_2022,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels...",female_2022,25.0_2022,1_2022,2_2022,113781_2022,151.55_2022,C22 C26_2022,S_2022,?_2022,?_2022,"Montreal, PQ / Chesterville, ON_2022",adult_2022,average_2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3_2022,0_2022,"Zabour, Miss. Hileni_2022",female_2022,14.5_2022,1_2022,0_2022,2665_2022,14.4542_2022,?_2022,C_2022,?_2022,328_2022,?_2022,teen_2022,average_2022
1305,3_2022,0_2022,"Zabour, Miss. Thamine_2022",female_2022,nan_2022,1_2022,0_2022,2665_2022,14.4542_2022,?_2022,C_2022,?_2022,?_2022,?_2022,senior_2022,average_2022
1306,3_2022,0_2022,"Zakarian, Mr. Mapriededer_2022",male_2022,26.5_2022,0_2022,0_2022,2656_2022,7.225_2022,?_2022,C_2022,?_2022,304_2022,?_2022,adult_2022,solo_2022
1307,3_2022,0_2022,"Zakarian, Mr. Ortin_2022",male_2022,27.0_2022,0_2022,0_2022,2670_2022,7.225_2022,?_2022,C_2022,?_2022,?_2022,?_2022,adult_2022,solo_2022
