In [1]:
import numpy as np
un_panda_numpy = np.array([100,5,20,80])
un_panda_numpy

array([100,   5,  20,  80])

In [2]:
famille_panda = [
    [100, 5  , 20, 80], # maman panda
    [50 , 2.5, 10, 40], # bébé panda
    [110, 6  , 22, 80], # papa panda
]
famille_panda_numpy = np.array(famille_panda)
famille_panda_numpy

array([[100. ,   5. ,  20. ,  80. ],
       [ 50. ,   2.5,  10. ,  40. ],
       [110. ,   6. ,  22. ,  80. ]])

In [3]:
famille_panda_numpy[:, 0]

array([100.,  50., 110.])

In [10]:
import pandas as pd
famille_panda_df = pd.DataFrame(famille_panda_numpy,
                                index = ['maman', 'bebe', 'papa'],
                                columns = ['pattes', 'poil', 'queue', 'ventre'])
famille_panda_df

Unnamed: 0,pattes,poil,queue,ventre
maman,100.0,5.0,20.0,80.0
bebe,50.0,2.5,10.0,40.0
papa,110.0,6.0,22.0,80.0


In [11]:
famille_panda_df.ventre
famille_panda_df["ventre"]

maman    80.0
bebe     40.0
papa     80.0
Name: ventre, dtype: float64

In [12]:
for ind_ligne, contenu_ligne in famille_panda_df.iterrows():
    print(f"Voici le panda {ind_ligne} :")
    print(contenu_ligne)
    print("--------------------")

Voici le panda maman :
pattes    100.0
poil        5.0
queue      20.0
ventre     80.0
Name: maman, dtype: float64
--------------------
Voici le panda bebe :
pattes    50.0
poil       2.5
queue     10.0
ventre    40.0
Name: bebe, dtype: float64
--------------------
Voici le panda papa :
pattes    110.0
poil        6.0
queue      22.0
ventre     80.0
Name: papa, dtype: float64
--------------------


In [13]:
famille_panda_df.iloc[2] # Avec iloc(), indexation positionnelle
famille_panda_df.loc["papa"] # Avec loc(), indexation par label

pattes    110.0
poil        6.0
queue      22.0
ventre     80.0
Name: papa, dtype: float64

In [14]:
famille_panda_df["ventre"] == 80

maman     True
bebe     False
papa      True
Name: ventre, dtype: bool

In [15]:
masque = famille_panda_df["ventre"] == 80
pandas_80 = famille_panda_df[masque]

# On écrit plus souvent cela de cette manière :
# pandas_80 = famille_panda_df[famille_panda_df["ventre"] == 80]

pandas_80

Unnamed: 0,pattes,poil,queue,ventre
maman,100.0,5.0,20.0,80.0
papa,110.0,6.0,22.0,80.0


In [16]:
quelques_pandas = pd.DataFrame([[105,4,19,80],[100,5,20,80]],      # deux nouveaux pandas
                               columns = famille_panda_df.columns) 
                               # même colonnes que famille_panda_df
tous_les_pandas = famille_panda_df.append(quelques_pandas)
tous_les_pandas

Unnamed: 0,pattes,poil,queue,ventre
maman,100.0,5.0,20.0,80.0
bebe,50.0,2.5,10.0,40.0
papa,110.0,6.0,22.0,80.0
0,105.0,4.0,19.0,80.0
1,100.0,5.0,20.0,80.0


In [19]:
tous_les_pandas = tous_les_pandas.drop_duplicates()
tous_les_pandas

Unnamed: 0,pattes,poil,queue,ventre
maman,100.0,5.0,20.0,80.0
bebe,50.0,2.5,10.0,40.0
papa,110.0,6.0,22.0,80.0
0,105.0,4.0,19.0,80.0


In [22]:
# accéder aux noms des colonnes
famille_panda_df.columns

Index(['pattes', 'poil', 'queue', 'ventre', 'sexe'], dtype='object')

In [25]:
# créer une nouvelle colonne, composée de chaînes de caractères
famille_panda_df["sexe"] = ["f", "f", "m"] 
# la maman et le bébé sont des femelles, le papa est un mâle
famille_panda_df

Unnamed: 0,pattes,poil,queue,ventre,sexe
maman,100.0,5.0,20.0,80.0,f
bebe,50.0,2.5,10.0,40.0,f
papa,110.0,6.0,22.0,80.0,m


In [24]:
# obtenir le nombre de lignes
len(famille_panda_df)

3

In [26]:
# obtenir les valeurs distinctes d'une colonne :
# pour la colonne ventre, il y a deux valeurs distinctes : 40 et 80
famille_panda_df.ventre.unique()

array([80., 40.])

In [28]:
#data = pd.read_csv("data.csv", sep=";")

## Manipulation des DataFrames

In [29]:
import seaborn as sns

In [30]:
titanic = sns.load_dataset('titanic')

In [31]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [32]:
titanic.age.unique()

array([22.  , 38.  , 26.  , 35.  ,   nan, 54.  ,  2.  , 27.  , 14.  ,
        4.  , 58.  , 20.  , 39.  , 55.  , 31.  , 34.  , 15.  , 28.  ,
        8.  , 19.  , 40.  , 66.  , 42.  , 21.  , 18.  ,  3.  ,  7.  ,
       49.  , 29.  , 65.  , 28.5 ,  5.  , 11.  , 45.  , 17.  , 32.  ,
       16.  , 25.  ,  0.83, 30.  , 33.  , 23.  , 24.  , 46.  , 59.  ,
       71.  , 37.  , 47.  , 14.5 , 70.5 , 32.5 , 12.  ,  9.  , 36.5 ,
       51.  , 55.5 , 40.5 , 44.  ,  1.  , 61.  , 56.  , 50.  , 36.  ,
       45.5 , 20.5 , 62.  , 41.  , 52.  , 63.  , 23.5 ,  0.92, 43.  ,
       60.  , 10.  , 64.  , 13.  , 48.  ,  0.75, 53.  , 57.  , 80.  ,
       70.  , 24.5 ,  6.  ,  0.67, 30.5 ,  0.42, 34.5 , 74.  ])

In [35]:
titanic.describe(include='all')

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
count,891.0,891.0,891,714.0,891.0,891.0,891.0,889,891,891,891,203,889,891,891
unique,,,2,,,,,3,3,3,2,7,3,2,2
top,,,male,,,,,S,Third,man,True,C,Southampton,no,True
freq,,,577,,,,,644,491,537,537,59,644,549,537
mean,0.383838,2.308642,,29.699118,0.523008,0.381594,32.204208,,,,,,,,
std,0.486592,0.836071,,14.526497,1.102743,0.806057,49.693429,,,,,,,,
min,0.0,1.0,,0.42,0.0,0.0,0.0,,,,,,,,
25%,0.0,2.0,,20.125,0.0,0.0,7.9104,,,,,,,,
50%,0.0,3.0,,28.0,0.0,0.0,14.4542,,,,,,,,
75%,1.0,3.0,,38.0,1.0,0.0,31.0,,,,,,,,


In [36]:
titanic.age.head(10)

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
5     NaN
6    54.0
7     2.0
8    27.0
9    14.0
Name: age, dtype: float64

In [37]:
titanic.fillna(value={'age': 0})

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,0.0,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [38]:
titanic.dropna()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
872,0,1,male,33.0,0,0,5.0000,S,First,man,True,B,Southampton,no,True
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [40]:
titanic.dropna(axis='columns')

Unnamed: 0,survived,pclass,sex,sibsp,parch,fare,class,who,adult_male,alive,alone
0,0,3,male,1,0,7.2500,Third,man,True,no,False
1,1,1,female,1,0,71.2833,First,woman,False,yes,False
2,1,3,female,0,0,7.9250,Third,woman,False,yes,True
3,1,1,female,1,0,53.1000,First,woman,False,yes,False
4,0,3,male,0,0,8.0500,Third,man,True,no,True
...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,0,0,13.0000,Second,man,True,no,True
887,1,1,female,0,0,30.0000,First,woman,False,yes,True
888,0,3,female,1,2,23.4500,Third,woman,False,no,False
889,1,1,male,0,0,30.0000,First,man,True,yes,True


In [42]:
titanic.rename(columns={'sex':'sexe'})

Unnamed: 0,survived,pclass,sexe,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [43]:
f = lambda x: x+1
titanic.rename(index=f)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
2,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
4,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
5,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
887,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
888,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
889,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
890,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [45]:
titanic.shape

(891, 15)

In [53]:
titanic.pivot_table('survived', index='sex', columns='class')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [58]:
titanic.pivot_table('survived', index='sex', columns='class', aggfunc="sum")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,91,70,72
male,45,17,47


In [59]:
titanic.dropna(inplace=True)
age = pd.cut(titanic['age'], [0, 18, 80])
titanic.pivot_table('survived', ['sex', age], 'class')

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.5
female,"(18, 80]",0.968254,0.875,0.666667
male,"(0, 18]",0.8,1.0,1.0
male,"(18, 80]",0.397436,0.333333,0.25
