### Dates avec PANDAS

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from time import time
from datetime import datetime, timedelta
rng = np.random.default_rng(seed=int(time()))
pd.__version__


'2.1.4'

In [3]:
df = pd.DataFrame(
    data={
        "name": [f"user_{i}" for i in range(5)],
        "birth_date": ["1990-02-24", "2000-01-17", "2004-05-05", "2010-03-18", "1984-06-28"]
    }
)
print(df.dtypes)
df

name          object
birth_date    object
dtype: object


Unnamed: 0,name,birth_date
0,user_0,1990-02-24
1,user_1,2000-01-17
2,user_2,2004-05-05
3,user_3,2010-03-18
4,user_4,1984-06-28


In [5]:
# conversion d'une colonne en date
pd.to_datetime(df["birth_date"])
# directement sur le df
df["birth_date"].astype("datetime64[ns]")

0   1990-02-24
1   2000-01-17
2   2004-05-05
3   2010-03-18
4   1984-06-28
Name: birth_date, dtype: datetime64[ns]

In [7]:
# utilisation du sous objet .dt qui permet d'utiliser les attributs et méthode
# du module datetime en version vectorisée
df["birth_date"] = df["birth_date"].astype("datetime64[ns]")
df["birth_date"].dt.day_of_year, df["birth_date"].dt.strftime("%d/%m/%Y")

(0     55
 1     17
 2    126
 3     77
 4    180
 Name: birth_date, dtype: int32,
 0    24/02/1990
 1    17/01/2000
 2    05/05/2004
 3    18/03/2010
 4    28/06/1984
 Name: birth_date, dtype: object)

In [29]:
# arithmétique de dates avec broadcasting
# compatibilité avec datetime
# dt - dt2 => delta
print(df["birth_date"] - datetime.strptime("2000-06-01", "%Y-%m-%d"))
# dt + delta => dt2
df["birth_date"] + timedelta(days=3)

0   -3750 days
1    -136 days
2    1434 days
3    3577 days
4   -5817 days
Name: birth_date, dtype: timedelta64[ns]


TypeError: Cannot cast DatetimeIndex to dtype datetime64[h]

In [21]:
# Range
pd.date_range(start="2024-01-15", end="2024-01-19")
pd.date_range(start="2024-01-15", end="2024-01-19", freq="H")
pd.date_range(start="2024-01-15", end="2024-01-19", freq="12H")

# construciton depuis le début
pd.date_range(start="2024-01-15", periods=5, freq="D")
# jour du mois ici le dernier
pd.date_range(start="2024-01-15", periods=5, freq="M")
pd.date_range(start="2024-01-15", periods=5, freq="MS")

# pour le mois et l'année
pd.period_range("2024-01", periods=6, freq="6M")



PeriodIndex(['2024-01', '2024-07', '2025-01', '2025-07', '2026-01', '2026-07'], dtype='period[6M]')

In [24]:
# composition des index: si on veut un index avec le 1er jour de chaque et le 15 de chaque mois
index = pd.date_range("2024-01-01", periods=6, freq="MS")
ides = index + pd.Timedelta("14 days")
index.union(ides)

DatetimeIndex(['2024-01-01', '2024-01-15', '2024-02-01', '2024-02-15',
               '2024-03-01', '2024-03-15', '2024-04-01', '2024-04-15',
               '2024-05-01', '2024-05-15', '2024-06-01', '2024-06-15'],
              dtype='datetime64[ns]', freq=None)

In [32]:
# durée vers entiers



AttributeError: 'Timedelta' object has no attribute 'astype'

ValueError: Cannot convert from timedelta64[ns] to timedelta64[h]. Supported resolutions are 's', 'ms', 'us', 'ns'