# Exercise
Use one of the methods explained so far (Cleaner/ApplyToCols) to convert the provided
dataframe to datetime dtype, then extract the following features: 
- All parts of the datetime 
- The number of seconds from epoch
- The day in the week
- The day of the year

**Hint**: use the format `"%d %B %Y"` for the datetime. 


In [1]:
import pandas as pd

data = {
    "admission_dates": [
        "03 January 2023",
        "15 February 2023",
        "27 March 2023",
        "10 April 2023",
    ],
    "patient_ids": [101, 102, 103, 104],
    "age": [25, 34, 45, 52],
    "outcome": ["Recovered", "Under Treatment", "Recovered", "Deceased"],
}
df = pd.DataFrame(data)
print(df)

    admission_dates  patient_ids  age          outcome
0   03 January 2023          101   25        Recovered
1  15 February 2023          102   34  Under Treatment
2     27 March 2023          103   45        Recovered
3     10 April 2023          104   52         Deceased


In [2]:
# Write your solution here
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 

In [3]:
# Solution with ApplyToCols and ToDatetime
from skrub import ApplyToCols, ToDatetime, DatetimeEncoder
from sklearn.pipeline import make_pipeline
import skrub.selectors as s

to_datetime_encoder = ApplyToCols(ToDatetime(format="%d %B %Y"), cols="admission_dates")

datetime_encoder = ApplyToCols(
    DatetimeEncoder(add_total_seconds=True, add_weekday=True, add_day_of_year=True),
    cols=s.any_date(),
)

encoder = make_pipeline(to_datetime_encoder, datetime_encoder)
encoder.fit_transform(df)

Unnamed: 0,admission_dates_year,admission_dates_month,admission_dates_day,admission_dates_total_seconds,admission_dates_weekday,admission_dates_day_of_year,patient_ids,age,outcome
0,2023.0,1.0,3.0,1672704000.0,2.0,3.0,101,25,Recovered
1,2023.0,2.0,15.0,1676419000.0,3.0,46.0,102,34,Under Treatment
2,2023.0,3.0,27.0,1679875000.0,1.0,86.0,103,45,Recovered
3,2023.0,4.0,10.0,1681085000.0,1.0,100.0,104,52,Deceased


In [4]:
# Solution with Cleaner
from skrub import Cleaner
from sklearn.pipeline import make_pipeline
import skrub.selectors as s

datetime_encoder = ApplyToCols(
    DatetimeEncoder(add_total_seconds=True, add_weekday=True, add_day_of_year=True),
    cols=s.any_date(),
)

encoder = make_pipeline(Cleaner(datetime_format="%d %B %Y"), datetime_encoder)
encoder.fit_transform(df)

Unnamed: 0,admission_dates_year,admission_dates_month,admission_dates_day,admission_dates_total_seconds,admission_dates_weekday,admission_dates_day_of_year,patient_ids,age,outcome
0,2023.0,1.0,3.0,1672704000.0,2.0,3.0,101,25,Recovered
1,2023.0,2.0,15.0,1676419000.0,3.0,46.0,102,34,Under Treatment
2,2023.0,3.0,27.0,1679875000.0,1.0,86.0,103,45,Recovered
3,2023.0,4.0,10.0,1681085000.0,1.0,100.0,104,52,Deceased


Modify the script so that the `DatetimeEncoder` adds periodic encoding with sine
and cosine (aka circular encoding):

In [5]:
# Write your solution here
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 

Now modify the script above to add spline features (`periodic_encoding="spline"`). 


In [6]:
# Solution
from skrub import Cleaner
from sklearn.pipeline import make_pipeline
import skrub.selectors as s

datetime_encoder = ApplyToCols(
    DatetimeEncoder(
        periodic_encoding="spline",
        add_total_seconds=True,
        add_weekday=True,
        add_day_of_year=True,
    ),
    cols=s.any_date(),
)

encoder = make_pipeline(Cleaner(datetime_format="%d %B %Y"), datetime_encoder)
encoder.fit_transform(df)

Unnamed: 0,admission_dates_year,admission_dates_total_seconds,admission_dates_day_of_year,admission_dates_month_spline_00,admission_dates_month_spline_01,admission_dates_month_spline_02,admission_dates_month_spline_03,admission_dates_month_spline_04,admission_dates_month_spline_05,admission_dates_month_spline_06,...,admission_dates_weekday_spline_0,admission_dates_weekday_spline_1,admission_dates_weekday_spline_2,admission_dates_weekday_spline_3,admission_dates_weekday_spline_4,admission_dates_weekday_spline_5,admission_dates_weekday_spline_6,patient_ids,age,outcome
0,2023.0,1672704000.0,3.0,0.0,0.166667,0.666667,0.166667,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.666667,0.166667,0.0,0.0,101,25,Recovered
1,2023.0,1676419000.0,46.0,0.0,0.0,0.166667,0.666667,0.166667,0.0,0.0,...,0.0,0.0,0.0,0.166667,0.666667,0.166667,0.0,102,34,Under Treatment
2,2023.0,1679875000.0,86.0,0.0,0.0,0.0,0.166667,0.666667,0.166667,0.0,...,0.0,0.166667,0.666667,0.166667,0.0,0.0,0.0,103,45,Recovered
3,2023.0,1681085000.0,100.0,0.0,0.0,0.0,0.0,0.166667,0.666667,0.166667,...,0.0,0.166667,0.666667,0.166667,0.0,0.0,0.0,104,52,Deceased
