In [74]:
# pip install pyts

In [75]:
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, classification_report, confusion_matrix
from pyts.classification import TimeSeriesForest
import joblib

In [76]:
# Load data from the CSV file
data = pd.read_csv('historical_data.csv')  

In [77]:
# data['Timestamp'] = pd.to_datetime(data['Timestamp'])

In [78]:
display(data.head())

Unnamed: 0,Timestamp,RR,SPO2,MAP,SBP,DBP,HR,PP,CO,Class
0,2020-10-18 15:24,35.0,99.9,0.0,0.0,0.0,106.9,0.0,0.0,Normal
1,2020-10-18 15:25,36.4,100.0,87.0,98.9,63.1,107.3,35.8,3841.34,Normal
2,2020-10-18 15:26,35.2,100.0,75.2,97.9,63.0,107.5,34.9,3751.75,Normal
3,2020-10-18 15:27,34.0,100.0,74.8,97.2,62.5,107.0,34.7,3712.9,Normal
4,2020-10-18 15:28,34.9,100.0,74.0,96.0,62.0,107.0,34.0,3638.0,Normal


In [79]:
# Target variable is 'Class' and other columns are features
features = ['RR', 'SPO2', 'MAP', 'SBP', 'DBP', 'HR', 'PP', 'CO']

In [80]:
# Extract features and labels
X = data[features].values  
y = data['Class'].values

In [81]:
# Number of folds
n_splits = 10  # Adjust as needed

In [82]:
# Initialize TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=n_splits)

In [83]:
# Window size options (2^0 to 2^10)
window_sizes = [2**i for i in range(11)]

In [None]:
scaler = StandardScaler()

best_recall = 0.0
best_accuracy = 0.0
best_f1 = 0.0
best_precision = 0.0
best_params = None

# Iterate over different window sizes
for window_size in window_sizes:
    # Iterate over folds using TimeSeriesSplit
    for fold_num, (train_index, test_index) in enumerate(tscv.split(X), start=1):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Standardize the features
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Initialize and train TimeSeriesForest model
        tsf_classifier = TimeSeriesForest(n_estimators=100, n_jobs=-1, random_state=42, n_windows=window_size)
        tsf_classifier.fit(X_train_scaled, y_train)

        # Make predictions on the test set
        predictions = tsf_classifier.predict(X_test_scaled)

        # Evaluate the model using recall
        recall = recall_score(y_test, predictions, average='weighted', zero_division=1)        
        accuracy = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions, average='weighted', zero_division=1)
        f1 = f1_score(y_test, predictions, average='weighted', zero_division=1)

        # Check if this model is better than previous ones
        if recall > best_recall:
            best_recall = recall
            best_params = {'window_size': window_size, 'model_params': tsf_classifier.get_params()}
            best_accuracy = accuracy
            best_f1 = f1
            best_precision = precision


# Print the best results
print(f'\nBest Window Size: {best_params["window_size"]}')
print(f'Best Recall Score: {best_recall}')
print(f'Best Precision Score: {best_precision}')
print(f'Best Accuracy Score: {best_accuracy}')
print(f'Best F1 Score: {best_f1}')
print(f'Best Model Parameters: {best_params["model_params"]}')


In [85]:
# Standardize the features for the entire dataset
scaler.fit(X)
X_scaled = scaler.transform(X)

In [86]:
TS_random_forest_model = TimeSeriesForest(n_estimators=100, n_jobs=-1, random_state=42, n_windows=best_params["window_size"])  #window_size = 16
TS_random_forest_model.fit(X_scaled, y)  # Train on the entire dataset

In [87]:
# Save the best model to a file
joblib.dump(TS_random_forest_model, 'TS_random_forest_model.joblib')

['TS_random_forest_model.joblib']