# Tout sur les Series avec Pandas 

### Les structures de données

In [None]:
# Series 
    # utilise les tableaux numpy
    # peuvent être indexés par une str ou un entier
    # peuvent contenir des objets de type différents
    # utilisent la valeur NaN pour les valeurs manquantes
# DataFrames (ensemble de Series)
# Panels (ensemble de dataFrames)

In [None]:
# float
# int
# bool
# datetime64[ns] date sans la timezone
# datetime64[ns, tz]  date avec la timezone
# timedelta[ns] différence entre deux dates/horaires
# category (valeurs de catégories)
# object (valeur de chaîne de caractères)

In [2]:
import pandas as pd
fandango = pd.read_csv("fandango-score-comparison.csv")
fandango.head(2)

Unnamed: 0,FILM,RottenTomatoes,RottenTomatoes_User,Metacritic,Metacritic_User,IMDB,Fandango_Stars,Fandango_Ratingvalue,RT_norm,RT_user_norm,...,IMDB_norm,RT_norm_round,RT_user_norm_round,Metacritic_norm_round,Metacritic_user_norm_round,IMDB_norm_round,Metacritic_user_vote_count,IMDB_user_vote_count,Fandango_votes,Fandango_Difference
0,Avengers: Age of Ultron (2015),74,86,66,7.1,7.8,5.0,4.5,3.7,4.3,...,3.9,3.5,4.5,3.5,3.5,4.0,1330,271107,14846,0.5
1,Cinderella (2015),85,80,67,7.5,7.1,5.0,4.5,4.25,4.0,...,3.55,4.5,4.0,3.5,4.0,3.5,249,65709,12640,0.5


### Indexage avec des entiers

In [3]:
series_film = fandango["FILM"]
series_film.head(5)

0    Avengers: Age of Ultron (2015)
1                 Cinderella (2015)
2                    Ant-Man (2015)
3            Do You Believe? (2015)
4     Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object

In [4]:
series_rt = fandango["RottenTomatoes"]
series_rt.head()

0    74
1    85
2    80
3    18
4    14
Name: RottenTomatoes, dtype: int64

### Personnaliser son indexage

In [5]:
series_film[5]

'The Water Diviner (2015)'

In [6]:
series_rt[5]

63

In [17]:
from pandas import Series
film_names = series_film.values
rt_scores = series_rt.values
serie_custom = Series(data=rt_scores, index=film_names)
# Series(data, index) ici data = notes, index = nom du film
serie_custom.head()

Avengers: Age of Ultron (2015)    74
Cinderella (2015)                 85
Ant-Man (2015)                    80
Do You Believe? (2015)            18
Hot Tub Time Machine 2 (2015)     14
dtype: int64

In [18]:
serie_custom[['Minions (2015)', 'Leviatan (2014)']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


Minions (2015)     54.0
Leviatan (2014)     NaN
dtype: float64

In [19]:
serie_custom[5:11]

The Water Diviner (2015)             63
Irrational Man (2015)                42
Top Five (2014)                      86
Shaun the Sheep Movie (2015)         99
Love & Mercy (2015)                  89
Far From The Madding Crowd (2015)    84
dtype: int64

### Reindexer un objet Series

In [38]:
# serie.reindex(index=)
# retourner une liste d'index actuels (avec .tolist() ou .index)
# tri par order alpha avec
# sorted()

original_index = serie_custom.index
sorted_index = sorted(original_index)
sorted_by_index = serie_custom.reindex(index=sorted_index)
sorted_by_index[:15]

'71 (2015)                        97
5 Flights Up (2015)               52
A Little Chaos (2015)             40
A Most Violent Year (2014)        90
About Elly (2015)                 97
Aloha (2015)                      19
American Sniper (2015)            72
American Ultra (2015)             46
Amy (2015)                        97
Annie (2014)                      27
Ant-Man (2015)                    80
Avengers: Age of Ultron (2015)    74
Big Eyes (2014)                   72
Birdman (2014)                    92
Black Sea (2015)                  82
dtype: int64

In [23]:
sorted_by_index.shape

(146,)

### Trier un objet Series

In [24]:
# Series.sort_index()

# Series.sort_values()

sc2 = serie_custom.sort_index()
sc3 = serie_custom.sort_values()
print(sc2[:10], sc3[:10])

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
Aloha (2015)                  19
American Sniper (2015)        72
American Ultra (2015)         46
Amy (2015)                    97
Annie (2014)                  27
dtype: int64 Paul Blart: Mall Cop 2 (2015)     5
Hitman: Agent 47 (2015)           7
Hot Pursuit (2015)                8
Fantastic Four (2015)             9
Taken 3 (2015)                    9
The Boy Next Door (2015)         10
The Loft (2015)                  11
Unfinished Business (2015)       11
Mortdecai (2015)                 12
Seventh Son (2015)               12
dtype: int64


### Transformation de colonnes

In [28]:
(serie_custom / 10)[:10]

Avengers: Age of Ultron (2015)    7.4
Cinderella (2015)                 8.5
Ant-Man (2015)                    8.0
Do You Believe? (2015)            1.8
Hot Tub Time Machine 2 (2015)     1.4
The Water Diviner (2015)          6.3
Irrational Man (2015)             4.2
Top Five (2014)                   8.6
Shaun the Sheep Movie (2015)      9.9
Love & Mercy (2015)               8.9
dtype: float64

In [27]:
import numpy as np
# additionner chaque valeur avec une autre
np.add(serie_custom, serie_custom)[:10]

Avengers: Age of Ultron (2015)    148
Cinderella (2015)                 170
Ant-Man (2015)                    160
Do You Believe? (2015)             36
Hot Tub Time Machine 2 (2015)      28
The Water Diviner (2015)          126
Irrational Man (2015)              84
Top Five (2014)                   172
Shaun the Sheep Movie (2015)      198
Love & Mercy (2015)               178
dtype: int64

In [29]:
# appliquer la fonction sinus
np.sin(serie_custom)[:5]

Avengers: Age of Ultron (2015)   -0.985146
Cinderella (2015)                -0.176076
Ant-Man (2015)                   -0.993889
Do You Believe? (2015)           -0.750987
Hot Tub Time Machine 2 (2015)     0.990607
The Water Diviner (2015)          0.167356
Irrational Man (2015)            -0.916522
Top Five (2014)                  -0.923458
Shaun the Sheep Movie (2015)     -0.999207
Love & Mercy (2015)               0.860069
dtype: float64

In [30]:
# retourner la valeur max (renvoie une valeur unique, NON PAS un objet Series)
np.max(serie_custom)

100

In [32]:
series_normalized = serie_custom / 20
series_normalized[:10]

Avengers: Age of Ultron (2015)    3.70
Cinderella (2015)                 4.25
Ant-Man (2015)                    4.00
Do You Believe? (2015)            0.90
Hot Tub Time Machine 2 (2015)     0.70
The Water Diviner (2015)          3.15
Irrational Man (2015)             2.10
Top Five (2014)                   4.30
Shaun the Sheep Movie (2015)      4.95
Love & Mercy (2015)               4.45
dtype: float64

### Comparer et filtrer

In [35]:
series_greater_than_50 = serie_custom[serie_custom > 50]
series_greater_than_50[:10]

Avengers: Age of Ultron (2015)       74
Cinderella (2015)                    85
Ant-Man (2015)                       80
The Water Diviner (2015)             63
Top Five (2014)                      86
Shaun the Sheep Movie (2015)         99
Love & Mercy (2015)                  89
Far From The Madding Crowd (2015)    84
Black Sea (2015)                     82
Leviathan (2014)                     99
dtype: int64

In [37]:
# & et/ou | et/ou []
criteria_one = serie_custom > 50
criteria_two = serie_custom < 75
both_criteria = serie_custom[criteria_one & criteria_two]
both_criteria[:10]

Avengers: Age of Ultron (2015)    74
The Water Diviner (2015)          63
Unbroken (2014)                   51
Southpaw (2015)                   59
Insidious: Chapter 3 (2015)       59
The Man From U.N.C.L.E. (2015)    68
Run All Night (2015)              60
5 Flights Up (2015)               52
Welcome to Me (2015)              71
Saint Laurent (2015)              51
dtype: int64

### Alignement des données

In [58]:
rt_critics = Series(fandango["RottenTomatoes"].values, index=fandango["FILM"])
rt_users = Series(fandango["RottenTomatoes_User"].values, index=fandango["FILM"])
rt_mean = (rt_critics + rt_users) / 2
rt_mean

FILM
Avengers: Age of Ultron (2015)                    80.0
Cinderella (2015)                                 82.5
Ant-Man (2015)                                    85.0
Do You Believe? (2015)                            51.0
Hot Tub Time Machine 2 (2015)                     21.0
The Water Diviner (2015)                          62.5
Irrational Man (2015)                             47.5
Top Five (2014)                                   75.0
Shaun the Sheep Movie (2015)                      90.5
Love & Mercy (2015)                               88.0
Far From The Madding Crowd (2015)                 80.5
Black Sea (2015)                                  71.0
Leviathan (2014)                                  89.0
Unbroken (2014)                                   60.5
The Imitation Game (2014)                         91.0
Taken 3 (2015)                                    27.5
Ted 2 (2015)                                      52.0
Southpaw (2015)                                   69.5
Night