In [44]:
import sktime
import pandas as pd
from typing import List, Union


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Load Time_Series.csv to DataFrame

In [45]:
csv_df = pd.read_csv("../Test_datasets/simple1.csv",header=0, index_col=0).transpose()
csv_df.index.dtype
print(csv_df.to_string())
print(csv_df.columns)


dtype('O')

identifier  first  second  third
1             312       0     45
2             345       0     87
3             355       0     23
4             355       0     67
5             356       1     12
6             354       1      3
7             355       1    654
8             356       2      4
Index(['first', 'second', 'third'], dtype='object', name='identifier')


# 2. Convert Time Series DataFrame to SKTime format

In [46]:
def convert_df_to_sktime(df: pd.DataFrame, columns: List[str]=None, index: Union[str, int]=0) -> pd.DataFrame:
    """
    Convert single pd.DataFrame to sktime format
    Extract each column as a pd.Series and write them into DataFrame cells
        
    :param df: pd.DataFrame with multiple features
    :param columns: provide column names to be extraced
    :param index: provide specific index to access by row
    :return: pd.DataFrame with single row, each cell = pd.Series of feature
    
    ToDo: catch error when column doesn't exist in df (or let it crash?)
    """
    if columns is None:
        columns = df.columns
        
    ts = pd.DataFrame(columns=columns)
    for column in columns:
        
        ts.at[index, column] = [df[column]]
        
    ts.columns = ts.columns.astype("string")
    return ts

csv_ts = convert_df_to_sktime(df=csv_df, index=1)
type(csv_ts)
# csv_ts.columns = csv_ts.columns.astype("string")
csv_ts.to_string()


pandas.core.frame.DataFrame

'identifier                                      first                       second                             third \n1           [[312, 345, 355, 355, 356, 354, 355, 356]]  [[0, 0, 0, 0, 1, 1, 1, 2]]  [[45, 87, 23, 67, 12, 3, 654, 4]]'

# 3. Convert Multiple Time_Series.csv to a SKTime DataFrame
## 3.1 Provide List of Time_Series.csv Filepaths

In [47]:
csv_filepath_list  = ["../Test_datasets/simple1.csv",
                      "../Test_datasets/simple2.csv"]
csv_filepath_list

['../Test_datasets/simple1.csv', '../Test_datasets/simple2.csv']

## 3.2 Read and Convert each Time_Series.csv File Individually
&rarr; append sktime DataFrames to a list

In [69]:
csv_ts_list = []
for csv_index, csv_path_i in enumerate(csv_filepath_list):
    csv_df_i = pd.read_csv(csv_path_i, index_col=0, parse_dates=True).transpose().infer_objects()
    csv_df_i = csv_df_i.astype("float64")
    csv_df_i.columns = csv_df_i.columns.astype(csv_df_i.columns.inferred_type)
    csv_df_i.index = csv_df_i.index.astype(csv_df_i.index.inferred_type)
    csv_ts_i = convert_df_to_sktime(df=csv_df_i, index=csv_index)
    csv_ts_list.append(csv_ts_i)
len(csv_ts_list)

  csv_df_i.values = pd.DataFrame(csv_df_i.values)


AttributeError: can't set attribute

## 3.3 Merge List of DataFrames to the Final SKTime DataFrame

In [49]:
csv_ts = pd.concat(csv_ts_list)
csv_ts
csv_ts.loc[0, "first"]


identifier,first,second,third
0,"[[312, 345, 355, 355, 356, 354, 355, 356]]","[[0, 0, 0, 0, 1, 1, 1, 2]]","[[45, 87, 23, 67, 12, 3, 654, 4]]"
1,"[[331.0, 312.0, 333.0, 334.0, 345.0, 345.0, 35...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]","[[45.0, 87.0, 23.0, 67.0, 12.0, 3.0, 654.0, nan]]"


[1    312
 2    345
 3    355
 4    355
 5    356
 6    354
 7    355
 8    356
 Name: first, dtype: int64]

In [71]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket

print(type(csv_ts))
#csv_ts = csv_ts.to_numesric()
print(csv_ts.loc[0].convert_dtypes())
rocket = Rocket()
#rocket.fit(csv_ts)
sktime.datatypes.check_raise(csv_ts.convert_dtypes(), mtype="pd.DataFrame")


<class 'pandas.core.frame.DataFrame'>
identifier
first     [[312, 345, 355, 355, 356, 354, 355, 356]]
second                    [[0, 0, 0, 0, 1, 1, 1, 2]]
third              [[45, 87, 23, 67, 12, 3, 654, 4]]
Name: 0, dtype: object


TypeError: input should not have column of 'object' dtype