## Imputation

In [1]:
import numpy as np
from sklearn.impute import SimpleImputer

In [2]:
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')

In [3]:
imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])
SimpleImputer()
df = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]
print(imp_mean.transform(df))

[[ 7.   2.   3. ]
 [ 4.   3.5  6. ]
 [10.   3.5  9. ]]


## Holiday Features

In [13]:
from workalendar.europe.united_kingdom import UnitedKingdom
cal = UnitedKingdom()
cal.holidays()

ModuleNotFoundError: No module named 'workalendar.europe.united_kingdom'; 'workalendar.europe' is not a package

In [11]:
from typing import List
from dateutil.relativedelta import relativedelta, TH
import datetime
from workalendar.usa import California

def create_custom_holdays(year: int) -> List:
    custom_holidays = California().holidays()
    custom_holidays.append((
    (datetime.datetime(year, 11, 1) + relativedelta(weekday=TH(+4)) + datetime.timedelta(days=1)).date(),
    "Black Friday"
    ))
    return {k: v for (k, v) in custom_holidays}

custom_holidays = create_custom_holidays(2012)
custom_holidays

ModuleNotFoundError: No module named 'workalendar.usa'

In [16]:
pip install workalendar

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


## Date Annotation

In [17]:
import calendar
calendar.monthrange(2021, 1)

(4, 31)

In [34]:
from datetime import datetime
def year_anchor(current_date: datetime):
    return (
        (current_date - date(current_date.year, 1, 1)).days,
        (date(current_date.year, 12, 31) - current_date).days,
    )
year_anchor(datetime)

TypeError: an integer is required (got type getset_descriptor)

In [33]:
datetime.year()

TypeError: 'getset_descriptor' object is not callable

## Automated Feature Extraction

In [37]:
import featuretools as ft
from featuretools.primitives import Minute, Hour, Day, Month, Year, Weekday
import pandas as pd

In [42]:
data = pd.DataFrame(
    {'Time': ['2014-01-01 01:41:00',
              '2014-01-01 02:21:10',
              '2014-01-01 03:00:20',
              '2014-01-01 04:50:30',
              '2014-01-01 06:11:50'],
     'Target': [0, 0, 0, 0, 1]}
)
data['index'] = data.index
es = ft.EntitySet('My EntitySet')
es.add_dataframe(
    dataframe_name='main_data_table',
    index='index',
    dataframe=data,
    time_index='Time'
)
fm, features = ft.dfs(
    entityset=es,
    target_dataframe_name='main_data_table',
    trans_primitives=[Minute, Hour, Day, Month, Year, Weekday]
)



In [44]:
fm

Unnamed: 0_level_0,Target,DAY(Time),HOUR(Time),MINUTE(Time),MONTH(Time),WEEKDAY(Time),YEAR(Time)
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,1,1,41,1,2,2014
1,0,1,2,21,1,2,2014
2,0,1,3,0,1,2,2014
3,0,1,4,50,1,2,2014
4,1,1,6,11,1,2,2014


In [45]:
from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import ComprehensiveFCParameters

settings = ComprehensiveFCParameters()
extract_features(data, column_id='Time', default_fc_parameters=settings)

Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.66it/s]


Unnamed: 0,Target__variance_larger_than_standard_deviation,Target__has_duplicate_max,Target__has_duplicate_min,Target__has_duplicate,Target__sum_values,Target__abs_energy,Target__mean_abs_change,Target__mean_change,Target__mean_second_derivative_central,Target__median,...,index__fourier_entropy__bins_5,index__fourier_entropy__bins_10,index__fourier_entropy__bins_100,index__permutation_entropy__dimension_3__tau_1,index__permutation_entropy__dimension_4__tau_1,index__permutation_entropy__dimension_5__tau_1,index__permutation_entropy__dimension_6__tau_1,index__permutation_entropy__dimension_7__tau_1,index__query_similarity_count__query_None__threshold_0.0,index__mean_n_absolute_max__number_of_maxima_7
2014-01-01 01:41:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,...,,,,,,,,,,
2014-01-01 02:21:10,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,...,,,,,,,,,,
2014-01-01 03:00:20,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,...,,,,,,,,,,
2014-01-01 04:50:30,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,...,,,,,,,,,,
2014-01-01 06:11:50,0.0,0.0,0.0,0.0,1.0,1.0,,,,1.0,...,,,,,,,,,,


## ROCKET

In [47]:
from sktime.datasets import load_arrow_head
#from sktime.utils.data_processing import from_nested_to_2d_array

In [49]:
X_train, y_train = load_arrow_head(split="train", return_X_y=True)
X_train.head()

Unnamed: 0,dim_0
0,0 -1.963009 1 -1.957825 2 -1.95614...
1,0 -1.774571 1 -1.774036 2 -1.77658...
2,0 -1.866021 1 -1.841991 2 -1.83502...
3,0 -2.073758 1 -2.073301 2 -2.04460...
4,0 -1.746255 1 -1.741263 2 -1.72274...


In [54]:
from sktime.utils.data_processing # import from_nested_to_2d_array

SyntaxError: invalid syntax (777279288.py, line 1)