In [9]:
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
from skfeature.function.similarity_based import fisher_score

from ruletransform import ContractedRuleTransform
from ruletransform.utils import get_shapelets_lengths_interval
from ruletransform.data_io import load_from_tsfile_to_dataframe

ModuleNotFoundError: No module named 'ruletransform.data_io'

# Load Dataset

In [5]:
name = 'BasicMotions'

X_train, y_train = load_from_tsfile_to_dataframe(
    os.path.abspath(os.path.join('..', 'data', name, name + "_TRAIN.ts"))
)
X_test, y_test = load_from_tsfile_to_dataframe(
        os.path.abspath(os.path.join('..', 'data', name, name + "_TEST.ts"))
    )

# Shapelet Length Selection

In [6]:
min_length, max_length = get_shapelets_lengths_interval(X_train, y_train, total_time=2)

ValueError: If passed as a pd.DataFrame, X must be a nested pd.DataFrame, with pd.Series or np.arrays inside cells.

# Rule Transform Without Shapelet Clustering

## Fit and Transform Dataset

In [14]:
rt = ContractedRuleTransform(
    shapelet_mining_contract=2,
    rule_mining_contract=1,
    min_shapelet_length=min_length,
    max_shapelet_length=max_length,
    verbose=0,
)

rt.fit(X_train.iloc[:, 0:2], y_train)
all_rules_counts = rt.transform(X_train.iloc[:, 0:2], test=False)
all_rules_counts_test = rt.transform(X_test.iloc[:, 0:2], test=True)

## Rule Visualization

## Rule Selection

In [15]:
#Percentage of rules to keep
percentage = 20

top_k = int(all_rules_counts.shape[1]*percentage/100)

In [16]:
#Indices of rules between inexistant shapelets (flagged -1 supports)
to_delete = np.where(np.all(all_rules_counts==-1,axis=1))

#Delete the -1 columns
all_rules_counts = np.delete(all_rules_counts, to_delete, axis=0)
all_rules_counts_test = np.delete(all_rules_counts_test, to_delete, axis=0)

 #Array to hold the support of each rule
supports = np.zeros(all_rules_counts.shape[1], dtype=np.uint16)

#Count and store the support of each rule
for k in range(all_rules_counts.shape[1]):
    supports[k]=all_rules_counts[:,k].sum()

#Get indices of rules with highest support
best_rules_indices = np.argsort(supports)[::-1][:top_k]

#Get the best rules
best_rules = all_rules_counts[:,best_rules_indices]
best_rules_test = all_rules_counts_test[:,best_rules_indices] 

## Classification

In [17]:
#Fit and Transform a Random Forest Classifier
clf = RandomForestClassifier(random_state=0, n_estimators=500)
clf.fit(best_rules, y_train)
y_pred = clf.predict(best_rules_test)

In [18]:
#Compute Test Accuracy
print('The test accuracy is: ' + str(accuracy_score(y_test, y_pred)))

The test accuracy is: 0.975


# Rule Transform With Shapelet Clustering

## Fit and Transform Dataset

In [19]:
rt = ContractedRuleTransform(
    shapelet_mining_contract=2,
    rule_mining_contract=1,
    min_shapelet_length=min_length,
    max_shapelet_length=max_length,
    verbose=0,
    clustering_ratio=80
)

rt.fit(X_train.iloc[:, 0:2], y_train)
all_rules_counts = rt.transform(X_train.iloc[:, 0:2], test=False)
all_rules_counts_test = rt.transform(X_test.iloc[:, 0:2], test=True)

  return array(a, dtype, copy=False, order=order)


## Classification

In [20]:
#Fit and Transform a Random Forest Classifier
clf = RandomForestClassifier(random_state=0, n_estimators=500)
clf.fit(all_rules_counts, y_train)
y_pred = clf.predict(all_rules_counts_test)

#Compute Test Accuracy
print('The test accuracy is: ' + str(accuracy_score(y_test, y_pred)))

The test accuracy is: 0.975
