In [None]:
# Import packages
import warnings
warnings.filterwarnings('ignore')

import sktime
from sktime.classification.kernel_based import ROCKETClassifier
from sktime.classification.kernel_based import Arsenal
from sktime.classification.interval_based import DrCIF
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.classification.dictionary_based import TemporalDictionaryEnsemble
from sktime.datatypes._panel._convert import (
    from_2d_array_to_nested,
    from_nested_to_2d_array,
    is_nested_dataframe,
)

import sys
import os
import argparse
import pandas as pd
import numpy as np

# =====

# Define classify function where:

# classifier = classifier name
# traindirectory = train data directory (from current working directory)
# testdirectory = test data directory (from current working directory)
# resultdirectory = directory to output results
# index = index of the interested dataset

def classify(classifier, traindirectory, testdirectory, resultdirectory, index):
    
    # Define list of train and test directory files
    traindirectorylist = os.listdir(os.getcwd()+traindirectory)
    traindirectorylist.sort()
    
    testdirectorylist = os.listdir(os.getcwd()+testdirectory)
    testdirectorylist.sort()
   
    # =====
    
    # Read train file
    trainfilepath = os.path.join(os.getcwd()+traindirectory, traindirectorylist[index])    
    traindata = np.genfromtxt(trainfilepath, delimiter=' ')
    traindataframe = pd.DataFrame(traindata)
    traindataframe = traindataframe.astype(float)
    print(trainfilepath)

    # =====

    # Read test file
    testfilepath = os.path.join(os.getcwd()+testdirectory, testdirectorylist[index])
    testdata = np.genfromtxt(testfilepath, delimiter=' ')
    testdataframe = pd.DataFrame(testdata)  
    testdataframe = testdataframe.astype(float)

    # =====

    # Results file path
    trainresultpath = os.path.join(os.getcwd()+resultdirectory, str(traindirectorylist[index][0:-4])+"_RESULTS.txt")
    testresultpath = os.path.join(os.getcwd()+resultdirectory, str(testdirectorylist[index][0:-4])+"_RESULTS.txt")

    # =====

    # X = data, Y = labels

    # Define features and labels for train data
    X_train = traindataframe.iloc[:,1:]
    X_train = X_train.values
    X_train = X_train.astype(np.float64)

    Y_train = traindataframe.iloc[:,0]
    Y_train = np.array(Y_train)
    Y_train = Y_train.astype(np.float64)

    # Define features and labels for test data
    X_test = testdataframe.iloc[:,1:]
    X_test = X_test.values
    X_test = X_test.astype(np.float64)

    Y_test = testdataframe.iloc[:,0]
    Y_test = np.array(Y_test)
    Y_test = Y_test.astype(np.float64)
    
    # =====

    # Train classifier
    X_train = from_2d_array_to_nested(X_train)
    X_test = from_2d_array_to_nested(X_test)
    clf = classifier(save_transformed_data=True,n_jobs=-1)
    clf.fit(X_train, Y_train)

    # =====

    # OUTPUT FOR TRAIN
    # Use _get_train_probs for training probabilities
    trainprobadf = clf._get_train_probs(X_train, Y_train)
    # Construct dataframe train output
    trainoutputdf = pd.DataFrame()
    # Labels are given for training set
    trainoutputdf.loc[:,'Label'] = Y_train
    # Predictions are the highest weighted class for each case
    trainoutputdf.loc[:,'Prediction'] = clf.classes_[np.argmax(trainprobadf, axis=1)]
    # Concatenate class probabilities with labels and predictions for train
    trainoutputdf = pd.concat([trainoutputdf, pd.DataFrame(trainprobadf)], axis=1)
    

    # OUTPUT FOR TEST
    # Use predict to predict labels for test using trained model
    Y_testpred = clf.predict(X_test)
    # Construct dataframe for test output
    testoutputdf = pd.DataFrame()
    # Labels are given for test set
    testoutputdf.loc[:,'Label'] = Y_test
    # Predictions are the returned class using predict function
    testoutputdf.loc[:,'Prediction'] = Y_testpred
    # Class probabilities is given using predict_proba function
    testprobadf = pd.DataFrame(clf.predict_proba(X_test))
    # Concatenate class probabilities with labels and predictions for test
    testoutputdf = pd.concat([testoutputdf, testprobadf], axis=1)
    

    # Output results
    trainoutputdf.to_csv(trainresultpath, header=True, index=None, sep=' ', mode='w')
    testoutputdf.to_csv(testresultpath, header=True, index=None, sep=' ', mode='w')

    
# Parser inputs
parser = argparse.ArgumentParser()

parser.add_argument("-c", "--classifier", required=False, default="Arsenal")
parser.add_argument("-tr", "--traindirectory", required=False, default="/resampled_txt/resampletrain1")
parser.add_argument("-te", "--testdirectory", required=False, default="/resampled_txt/resampletest1")
parser.add_argument("-re", "--resultdirectory", required=False, default="/resampled_txt/arsenal1")
parser.add_argument("-i", "--index", type = int, required=False, default=0)

arguments = parser.parse_args()

classifier_name = arguments.classifier

if classifier_name == "Arsenal":
   classifier=Arsenal

elif classifier_name == "DrCIF":
   classifier=DrCIF

elif classifier_name == "ShapeletTransformClassifier":
   classifier=ShapeletTransformClassifier

elif classifier_name == "TemporalDictionaryEnsemble":
   classifier=TemporalDictionaryEnsemble




print(classifier_name)
print(classifier)

traindirectory = arguments.traindirectory
testdirectory = arguments.testdirectory
resultdirectory = arguments.resultdirectory
index = arguments.index

# classify(classifier, traindirectory, testdirectory, resultdirectory, index)

# classify(Arsenal, r"/resampled_txt/resampletrain1", r"/resampled_txt/resampletest1", r"/resampled_txt/arsenal1", 9)
