# Capstone project: Classify robot failures
### Optional challenge: read the raw data files and format them correctly to save as CSV files

In [None]:
# Write code to read data files and format as CSV here.

## 1. Initial setup

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support 

from sktime.classification.dictionary_based import WEASEL
from sktime.classification.ensemble import BaggingClassifier
from sktime.classification.feature_based import SummaryClassifier, Catch22Classifier

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

from tsfresh import extract_features, select_features

import warnings
warnings.filterwarnings('ignore')

### Read data (lp1 data)

In [None]:
X = pd.read_csv('data/lp1_X.csv')
y = pd.read_csv('data/lp1_y.csv').squeeze()

X.head()

In [None]:
X = X.set_index(['id', 'timestep'])
X.head()

In [None]:
y.value_counts()

### Train/test split

In [None]:
# Get index of each label



In [None]:
# Get train/test split indices for each label and concatenate
# all train and test indices together

print(train_index)
print(test_index)

In [None]:
# Train/test split


## 2. Modeling

In [None]:
label_mapping = {
    'obstruction': 0,
    'normal': 1,
    'collision': 2,
    'fr_collision': 3
}

# Map the labels to integers


### Bagging

In [None]:
# Use bagging with WEASEL
# Store the precision, recall and F1-Score
# Print out the classification report



### Summary classifier

In [None]:
# Use Summary Classifier with a list of sklearn classifiers
# For all models, print the precision, recall, and F1-Score
# Which model performed best?


### TSFresh

In [None]:
X = pd.read_csv('data/lp1_X.csv')
y = pd.read_csv('data/lp1_y.csv').squeeze()

In [None]:
# Extract features
X_feat = extract_features(X, column_id='id', column_sort='timestep')
X_feat = X_feat.dropna(axis=1)

# Train/test split. Use the same indices as above
X_train, y_train = X_feat.iloc[train_index], y.iloc[train_index]
X_test, y_test = X_feat.iloc[test_index], y.iloc[test_index]

# Filter features
X_train_filtered = select_features(X_train, y_train, multiclass=True, n_significant=1)
X_test_filtered = X_test[X_train_filtered.columns]

# Define a list of sklearn classifiers
classifiers = [knn_clf, ada_clf, rf_clf, svc_clf]

# For each classifier, fit and print out the precision, recall and F1-Score
# Which model performed best?
for classifier in classifiers:
       
    classifier.fit(X_train_filtered, y_train)
    
    y_pred = classifier.predict(X_test_filtered)
    
    precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
    
    print(f"Precision {precision} \nRecall: {recall} \nF1-Score: {fscore} \n===================")


## 3. Evaluation

In [None]:
# Make a bar plot of F1-Scores for each method that we tried

x = ['Bagging (WEASEL)', 'Summary (Random forest)', 'TSFresh (Random forest)']
y = [0.50, 0.73, 0.81]

fig, ax = plt.subplots()
ax.bar(x, y)
ax.set_xlabel('Model')
ax.set_ylabel('F1-Score (weighted)')
ax.set_ylim(0, 1)

for i, v in enumerate(y):
    ax.text(x=i, y=v+0.05, s=str(v), ha='center')

plt.tight_layout()