In [None]:
# !pip install scikit-learn
# !pip install numpy==1.16.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics.classification import accuracy_score, recall_score, f1_score
import scipy.stats as st

In [None]:
def A(sample):
    feat = []
    for col in range(0,sample.shape[1]):
        average = np.average(sample[:,col])
        feat.append(average)

    return feat

def SD(sample):
    feat = []
    for col in range(0, sample.shape[1]):
        std = np.std(sample[:, col])
        feat.append(std)

    return feat

def AAD(sample):
    feat = []
    for col in range(0, sample.shape[1]):
        data = sample[:, col]
        add = np.mean(np.absolute(data - np.mean(data)))
        feat.append(add)

    return feat

def ARA(sample):
    #Average Resultant Acceleration[1]:
    # Average of the square roots of the sum of the values of each axis squared √(xi^2 + yi^2+ zi^2) over the ED
    feat = []
    sum_square = 0
    sample = np.power(sample, 2)
    for col in range(0, sample.shape[1]):
        sum_square = sum_square + sample[:, col]

    sample = np.sqrt(sum_square)
    average = np.average(sample)
    feat.append(average)
    return feat

def TBP(sample):
    from scipy import signal
    feat = []
    sum_of_time = 0
    for col in range(0, sample.shape[1]):
        data = sample[:, col]
        peaks = signal.find_peaks_cwt(data, np.arange(1,4))

        feat.append(peaks)

    return feat

In [None]:
## Feature Extraction

def feature_extraction(X):
    # Extracts the features, as mentioned by Catal et al. 2015
    # Average - A,
    # Standard Deviation - SD,
    # Average Absolute Difference - AAD,
    # Average Resultant Acceleration - ARA(1),
    # Time Between Peaks - TBP
    X_tmp = []
    for sample in X:
        features = A(sample)
        features = np.hstack((features, A(sample)))
        features = np.hstack((features, SD(sample)))
        features = np.hstack((features, AAD(sample)))
        features = np.hstack((features, ARA(sample)))
        X_tmp.append(features)

    X = np.array(X_tmp)
    return X

In [None]:
# Classical Machine Learning Algos
def train_j48(X, y):
    from sklearn import tree
    clf = tree.DecisionTreeClassifier()
   
    return clf

def train_mlp(X, y):
    from sklearn.neural_network import MLPClassifier
    a = int((X.shape[1] + np.amax(y)) / 2 )#Default param of weka, amax(y) gets the number of classes
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes = (a,),
                        learning_rate_init=0.3, momentum=0.2, max_iter=500, #Default param of weka
                        )
    
    return clf

def train_logistic_regression(X, y):
    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(multi_class='ovr')
    
    return clf

In [None]:
def OpportunityDataSetAnalysis():
  X_train=np.load('/content/drive/MyDrive/Opportunity/Opportunity_train_X.npz')['arr_0']
  X_test=np.load('/content/drive/MyDrive/Opportunity/Opportunity_test_X.npz')['arr_0']
  Y_train=np.load('/content/drive/MyDrive/Opportunity/Opportunity_train_y.npz')['arr_0']
  Y_test=np.load('/content/drive/MyDrive/Opportunity/Opportunity_test_Y.npz')['arr_0']
  Y_train = np.argmax(Y_train, axis=1)
  Y_test = np.argmax(Y_test, axis=1)
  return X_train,Y_train,X_test,Y_test

In [None]:
def TrainAndTestOpportunity(X_train,Y_train,X_test,Y_test):
    X_train = feature_extraction(X_train)
    X_test = feature_extraction(X_test)

    j_48 = train_j48(X_train,Y_train)
    mlp = train_mlp(X_train, Y_train)
    logistic_regression = train_logistic_regression(X_train, Y_train)

    majority_voting = VotingClassifier(estimators=[('dt', j_48), ('mlp', mlp), ('lr', logistic_regression)], voting='soft')
    majority_voting.fit(X_train, Y_train)
    tmp = majority_voting.predict(X_test)

    acc = accuracy_score(Y_test, tmp)

    recall = recall_score(Y_test, tmp, average='macro')

    f1 = f1_score(Y_test, tmp, average='macro')

    print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}]'.format(acc, recall, f1))
    print('________________________________________________________________')

In [None]:
def ReportAccuracies(avg_acc, avg_recall,avg_f1):
  ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc))
  ic_recall = st.t.interval(0.9, len(avg_recall) - 1, loc=np.mean(avg_recall), scale=st.sem(avg_recall))
  ic_f1 = st.t.interval(0.9, len(avg_f1) - 1, loc=np.mean(avg_f1), scale=st.sem(avg_f1))
  print('Mean Accuracy[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_acc), ic_acc[0], ic_acc[1]))
  print('Mean Recall[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_recall), ic_recall[0], ic_recall[1]))
  print('Mean F1[{:.4f}] IC [{:.4f}, {:.4f}]'.format(np.mean(avg_f1), ic_f1[0], ic_f1[1]))

In [None]:
def RunOpportunity():
  X_train,Y_train,X_test,Y_test=OpportunityDataSetAnalysis()
  TrainAndTestOpportunity(X_train,Y_train,X_test,Y_test)

In [None]:
RunOpportunity()