In [1]:
import pandas as pd
from datetime import datetime, date, time, timedelta

## 1. Lecture Données CSV du tournoi des 6 nations

In [2]:
dfMatches = pd.read_csv('data/6nations/2023/matches.csv', parse_dates=['datetime', 'datetime_paris'])
dfMatches.head(3)  #.tail(3)

Unnamed: 0,guid,uuid,datetime,datetime_paris,round,teamA_name,teamA_shortName,teamB_name,teamB_shortName,score_teamA,...,htscore_teamB,status,minutes,referee_firstName,referee_lastName,referee_knownAs,venue_name,venue_location,venue_timezone,attendance
0,22WI1234,8a908178-2222-4952-9759-62f1d7d7ed6b,2023-02-04 14:15:00+00:00,2023-02-04 15:15:00+01:00,1,Pays de Galles,WALE,Irlande,IREL,10,...,27,Res,80,Karl,Dickson,Karl Dickson,Principality Stadium,Cardiff,Europe/London,74500
1,22ES1234,63d81950-41cd-43f4-b5ad-9ef1b68b673c,2023-02-04 16:45:00+00:00,2023-02-04 17:45:00+01:00,1,Angleterre,ENGL,Écosse,SCOT,23,...,12,Res,80,Paul,Williams,Paul Williams,Twickenham,London,Europe/London,0
2,22IF1234,b0354f39-fa83-4fb7-817d-d64d06e956eb,2023-02-05 15:00:00+00:00,2023-02-05 16:00:00+01:00,1,Italie,ITAL,France,FRAN,24,...,19,Res,80,Matthew,Carley,Matthew Carley,Stadio Olimpico,Rome,Europe/Rome,41232


In [3]:
dStatPlayers = pd.read_csv('data/6nations/2023/players.csv')
dStatPlayers.head(3)

Unnamed: 0,game_guid,team,uuid,guid,firstName,lastName,position,starter,MP,T,...,TW,TC,HE,PC,OPC,SPC,LW,LS,YC,RC
0,22WI1234,teamA,0a53a3d1-ed03-4d88-92e2-243e1f205908,LW454197,Liam,Williams,15,True,0,0,...,1,0,0,2,0,0,0,0,0,0
1,22WI1234,teamA,9251b023-6fe1-43e3-8606-a488a6e5269a,JA935900,Josh,Adams,14,True,0,0,...,0,0,0,1,0,0,0,0,0,0
2,22WI1234,teamA,7fd1fa2d-77f1-4427-8000-45ff3ebf24fd,GN107642,George,North,13,True,0,0,...,1,0,1,0,0,0,1,0,0,0


In [4]:
dfMatches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 22 columns):
 #   Column             Non-Null Count  Dtype                               
---  ------             --------------  -----                               
 0   guid               15 non-null     object                              
 1   uuid               15 non-null     object                              
 2   datetime           15 non-null     datetime64[ns, UTC]                 
 3   datetime_paris     15 non-null     datetime64[ns, pytz.FixedOffset(60)]
 4   round              15 non-null     int64                               
 5   teamA_name         15 non-null     object                              
 6   teamA_shortName    15 non-null     object                              
 7   teamB_name         15 non-null     object                              
 8   teamB_shortName    15 non-null     object                              
 9   score_teamA        15 non-null     int64     

## 2. Données temporelles avec Python

### Construction à partir des composantes

In [5]:
hier = date(2023,3,13)
print(hier)
hier

2023-03-13


datetime.date(2023, 3, 13)

In [6]:
hier_a_17h30 = datetime(2023,3,13,17,30)
print(hier_a_17h30)
print(hier_a_17h30.date())
print(hier_a_17h30.time())
print(hier_a_17h30.year)
print(hier_a_17h30.month)
print(hier_a_17h30.day)
print(hier_a_17h30.hour)
# etc...
hier_a_17h30

2023-03-13 17:30:00
2023-03-13
17:30:00
2023
3
13
17


datetime.datetime(2023, 3, 13, 17, 30)

In [7]:
heureLever = time(7,15)
print(heureLever)
heureLever

07:15:00


datetime.time(7, 15)

### Conversion texte <-> donnée temporelle avec format
Doc ici sur les formats: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [8]:
dateIso = '2023-03-13'
datetimeIso = '2020-02-29 18:05:04'
dateFr = '29/02/2000'
dateUs = '2/29/2000'

In [9]:
date1 = date.fromisoformat(dateIso)
date1

datetime.date(2023, 3, 13)

In [10]:
datetime1 = datetime.fromisoformat(datetimeIso)
datetime1

datetime.datetime(2020, 2, 29, 18, 5, 4)

In [11]:
date2 = datetime.strptime(dateFr, '%d/%m/%Y').date()
date2

datetime.date(2000, 2, 29)

In [12]:
datetime.strptime(dateUs, '%m/%d/%Y').date()

datetime.date(2000, 2, 29)

In [13]:
# error pas de mois 29
# datetime.strptime(dateUs, '%d/%m/%Y').date()

# erreur sur les 29 février d'années non bissextiles
# datetime.strptime('29/2/2023', '%d/%m/%Y').date()

In [14]:
print(datetime1)  # ISO format par défaut
print(datetime1.strftime('%d/%m/%Y %H:%M')) # FR format jusqu'aux minutes 
print(f"Formation a lieu le {datetime1:%d/%m/%Y à %H:%M}") # Format inside formatted string

2020-02-29 18:05:04
29/02/2020 18:05
Formation a lieu le 29/02/2020 à 18:05


## 3. Sélection temporelles avec Pandas

In [15]:
# Q1. extraire les matches du 11 mars 2023
# TODO

In [16]:
# Q2. extraire les matches du 11 au 12 mars 2023
# TODO

In [17]:
# Q3. extraire les matches de mars
# étape intermédiaire; extraire le mois
dfMatches.datetime_paris.dt.month

0     2
1     2
2     2
3     2
4     2
5     2
6     2
7     2
8     2
9     3
10    3
11    3
12    3
13    3
14    3
Name: datetime_paris, dtype: int64

In [18]:
# match du mois de mars
dfMatches.loc[
    dfMatches.datetime_paris.dt.month == 3, 
    ['datetime_paris', 'teamA_name', 'teamB_name', 'score_teamA', 'score_teamB']
]

Unnamed: 0,datetime_paris,teamA_name,teamB_name,score_teamA,score_teamB
9,2023-03-11 15:15:00+01:00,Italie,Pays de Galles,17,29
10,2023-03-11 17:45:00+01:00,Angleterre,France,10,53
11,2023-03-12 16:00:00+01:00,Écosse,Irlande,7,22
12,2023-03-18 13:30:00+01:00,Écosse,Italie,0,0
13,2023-03-18 15:45:00+01:00,France,Pays de Galles,0,0
14,2023-03-18 18:00:00+01:00,Irlande,Angleterre,0,0


In [19]:
# Q4. matches finis du mois de mars
dfMatches.loc[
    (dfMatches.datetime_paris.dt.month == 3) & (dfMatches.status == 'Res'), 
    ['datetime_paris', 'teamA_name', 'teamB_name', 'score_teamA', 'score_teamB']
]

Unnamed: 0,datetime_paris,teamA_name,teamB_name,score_teamA,score_teamB
9,2023-03-11 15:15:00+01:00,Italie,Pays de Galles,17,29
10,2023-03-11 17:45:00+01:00,Angleterre,France,10,53
11,2023-03-12 16:00:00+01:00,Écosse,Irlande,7,22


In [20]:
# Q5. matches de l'équipe de France
dfMatchesFrance = dfMatches.loc[
    (dfMatches.teamA_name == 'France') | (dfMatches.teamB_name == 'France'),
    ['datetime_paris', 'teamA_name', 'teamB_name', 'score_teamA', 'score_teamB']
]
dfMatchesFrance

Unnamed: 0,datetime_paris,teamA_name,teamB_name,score_teamA,score_teamB
2,2023-02-05 16:00:00+01:00,Italie,France,24,29
3,2023-02-11 15:15:00+01:00,Irlande,France,32,19
8,2023-02-26 16:00:00+01:00,France,Écosse,32,21
10,2023-03-11 17:45:00+01:00,Angleterre,France,10,53
13,2023-03-18 15:45:00+01:00,France,Pays de Galles,0,0


In [21]:
# Différence de temps entre les 2 premiers matches
dfMatchesFrance.loc[3, 'datetime_paris'] - dfMatchesFrance.loc[2, 'datetime_paris']

Timedelta('5 days 23:15:00')