In [5]:
import numpy as np
import scipy as sp
from scipy.fft import fft
from scipy.signal import butter, lfilter
import scipy.stats as stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, recall_score
from operator import itemgetter
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import time
import pickle
import random
import os
from google.colab import drive

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
seed = 57

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [8]:
drive.mount('/content/drive')
x = pickle.load(open('/content/drive/MyDrive/CI_Dataset/x.pkl', 'rb'))
y = pickle.load(open('/content/drive/MyDrive/CI_Dataset/y.pkl', 'rb'))

Mounted at /content/drive


In [9]:
not_seizure_samples = x[0:400]
seizure_samples = x[400:500]

In [10]:
sampling_freq = 173.6

b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

not_seizure_samples_filtered = np.array([lfilter(b, a, not_seizure_samples[ind, :]) for ind in range(not_seizure_samples.shape[0])])
seizure_samples_filtered = np.array([lfilter(b, a, seizure_samples[ind, :]) for ind in range(seizure_samples.shape[0])])

In [11]:
def normalization (data_points):
  normalized_data_points = np.zeros_like(data_points)
  scaler = MinMaxScaler()

  for column_index in range(data_points.shape[1]):
    normalized_data_points[:, column_index] = scaler.fit_transform(data_points[:, column_index].reshape(-1, 1)).flatten()
  
  return normalized_data_points

In [12]:
def get_features_added_data_points(data_points):
  features_added_data_points = []
  index = 0

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.min(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.mean(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.sqrt(np.min(data_point ** 2))) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.var(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.std(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.mean(data_point ** 2)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(np.abs(data_point))) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.ptp(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(np.abs(data_point)) / np.sqrt(np.mean(data_point ** 2))) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(stats.skew(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(stats.kurtosis(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.sqrt(np.mean(data_point ** 2)) / np.mean(data_point)) for data_point in data_points]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(np.abs(data_point)) / np.mean(data_point)) for data_point in data_points]
  index += 1

  fourier_transformed_data_points = []
  [fourier_transformed_data_points.append(fft(data_point)) for data_point in data_points]

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.sum(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.mean(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.var(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(np.max(np.abs(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points)))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(stats.skew(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points.append([])
  [features_added_data_points[index].append(stats.kurtosis(np.abs(fourier_transformed_data_points[data_point_index]**2) / len(data_points))) for data_point_index in range(len(data_points))]
  index += 1

  features_added_data_points = np.array(features_added_data_points).T

  return features_added_data_points

In [13]:
not_seizure_samples = not_seizure_samples_filtered
seizure_sample = seizure_samples_filtered

data_points_not_normalized = np.concatenate((not_seizure_samples, seizure_sample))
features_added_data_points_not_normalized = get_features_added_data_points(data_points_not_normalized)
data_points_normalized = normalization(features_added_data_points_not_normalized)

labels = np.concatenate((np.zeros((400, 1)), np.ones((100, 1))))

Part One -> Feature Selection

In [14]:
def implement_classification (measurement_type, algorithm_type, data_points_train, data_points_test, labels_train, labels_test, labels_type=0):  
  if algorithm_type == 'ID3':
    clf = DecisionTreeClassifier(random_state=seed)
  elif algorithm_type == 'Random Forest':
    clf = RandomForestClassifier(n_estimators=90, random_state=seed)

  clf = clf.fit(data_points_train, labels_train)
  labels_prediction = clf.predict(data_points_test)

  measurment = 0

  if measurement_type == 'Accuracy':
    measurment = accuracy_score(labels_test, labels_prediction)
  elif measurement_type == 'Recall':
    if labels_type == 0:
      measurment = recall_score(labels_test, labels_prediction)
    else:
      measurment = recall_score(labels_test, labels_prediction, average='weighted')

  return measurment

In [15]:
def get_sorted_measurements(measurement_type, data_points_train, data_points_test, labels_train, labels_test, labels_type=0):
  measurements = np.zeros((data_points_train.shape[1], 2))
  for feature_index in range(data_points_train.shape[1]):
    measurements[feature_index][0] = implement_classification(measurement_type, 'ID3', data_points_train[:, feature_index].reshape((data_points_train.shape[0], 1)), data_points_test[:, feature_index].reshape((data_points_test.shape[0], 1)), labels_train, labels_test, labels_type=labels_type)
    measurements[feature_index][1] = feature_index

  sorted_measurements = sorted(measurements, reverse=True, key=itemgetter(0))

  return sorted_measurements

In [16]:
def get_correlation(feature_1, feature_2):
  correlation_pearson_based = pearsonr(feature_1, feature_2)[0]

  return correlation_pearson_based

In [17]:
def get_f1(measurement, correlation):
  f1 = (2 * measurement * (1 - correlation)) / (measurement + (1 - correlation))

  return f1

In [18]:
def select_features(data_points, labels, measurement_type, stop_method=None, stop_measurment=0, check_all=False, labels_type=0):
  data_points_train, data_points_test, labels_train, labels_test = train_test_split(data_points, labels, random_state=seed, test_size=0.2)

  sorted_measurements = get_sorted_measurements(measurement_type, data_points_train, data_points_test, labels_train, labels_test, labels_type=labels_type)
  selected_features_indexes = [int(sorted_measurements[0][1])]

  previouse_measurment = 0

  for feature_index in range(1, len(sorted_measurements)):
    current_feature_index = sorted_measurements[feature_index][1]

    if check_all == True:
      selected_and_current_features_indexes = selected_features_indexes.copy()
      selected_and_current_features_indexes.append(int(current_feature_index))

      current_feature_measurement = implement_classification(measurement_type, 'ID3', data_points_train[:, selected_and_current_features_indexes].reshape((data_points_train.shape[0], len(selected_and_current_features_indexes))), data_points_test[:, selected_and_current_features_indexes].reshape((data_points_test.shape[0], len(selected_and_current_features_indexes))), labels_train, labels_test, labels_type=labels_type)
    else:
      current_feature_measurement = sorted_measurements[feature_index][0]

    correlations = []

    for selected_feature_index in range(len(selected_features_indexes)):
      correlation = get_correlation(data_points_train[:, int(current_feature_index)], data_points_train[:, selected_feature_index])
      correlations.append(np.abs(correlation))
    
    current_feature_correlation = np.max(correlations)

    f1 = get_f1(current_feature_measurement, current_feature_correlation)

    if f1 > 0.5:
      selected_features_indexes.append(int(current_feature_index))

      if stop_method == 'Feature Count':
        if len(selected_features_indexes) == stop_measurment:
          return sorted(selected_features_indexes)
      elif stop_method == 'Measurment Reached' or stop_method == 'Stabled':
        classification_for_stop_measurement = implement_classification(measurement_type, 'Random Forest', data_points_train[:, selected_features_indexes].reshape((data_points_train.shape[0], len(selected_features_indexes))), data_points_test[:, selected_features_indexes].reshape((data_points_test.shape[0], len(selected_features_indexes))), labels_train, labels_test, labels_type=labels_type)

        if stop_method == 'Measurment Reached':
          if classification_for_stop_measurement >= stop_measurment:
            return sorted(selected_features_indexes)
        elif stop_method == 'Stabled':
          if classification_for_stop_measurement - previouse_measurment >= stop_measurment:
            return sorted(selected_features_indexes)

  return sorted(selected_features_indexes)

In [19]:
data_points_train_as_input, data_points_test_as_input, labels_train_as_input, labels_test_as_input = train_test_split(data_points_normalized, labels, random_state=seed, test_size=0.2)

In [20]:
# With Accuracy
########################################################################

# Without Feature Selection
start = time.time()
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input, data_points_test_as_input, labels_train_as_input, labels_test_as_input)
end = time.time()
print("Without Feature Selection :: Accuracy")
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

# With Feature Selection
print("With Feature Selection", '\n')

# Without Checking All

#-----------------------------------------------------------------------
# Without Stop

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy')
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: No Stop :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
# With Stop

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Feature Count

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Feature Count', stop_measurment=5)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Feature Count :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Measurment Reached

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Measurment Reached', stop_measurment=0.95)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Measurment Reached :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Stablity

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Stabled', stop_measurment=0.1)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Stablity :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

########################################################################
# With Checking All

#-----------------------------------------------------------------------
# Without Stop

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', check_all=True)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: No Stop :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
# With Stop

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Feature Count

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Feature Count', stop_measurment=5, check_all=True)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Feature Count :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Measurment Reached

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Measurment Reached', stop_measurment=0.95, check_all=True)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Measurment Reached :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Stablity

# With Accuracy as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Accuracy', stop_method='Stabled', stop_measurment=0.1, check_all=True)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Stablity :: Accuracy")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

Without Feature Selection :: Accuracy
Result Accuracy 0.95
Result Time 0.14714717864990234 ms 

With Feature Selection 

No Checking All :: No Stop :: Accuracy
Selected Features Indexes [3, 4, 9, 10, 11, 12, 13, 17, 19, 20]
Result Accuracy 0.94
Result Time 0.203629732131958 ms 

No Checking All :: Stop :: Feature Count :: Accuracy
Selected Features Indexes [3, 4, 10, 11, 17]
Result Accuracy 0.95
Result Time 0.20018243789672852 ms 

No Checking All :: Stop :: Measurment Reached :: Accuracy
Selected Features Indexes [3, 4, 10, 11, 17, 20]
Result Accuracy 0.94
Result Time 0.8785185813903809 ms 

No Checking All :: Stop :: Stablity :: Accuracy
Selected Features Indexes [4, 17]
Result Accuracy 0.94
Result Time 0.2868764400482178 ms 

Checking All :: No Stop :: Accuracy
Selected Features Indexes [3, 4, 9, 10, 11, 12, 13, 17, 19, 20]
Result Accuracy 0.94
Result Time 0.20125746726989746 ms 

Checking All :: Stop :: Feature Count :: Accuracy
Selected Features Indexes [3, 4, 10, 11, 17]
Result A

In [21]:
# With Recall
########################################################################

# Without Feature Selection
start = time.time()
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input, data_points_test_as_input, labels_train_as_input, labels_test_as_input)
end = time.time()
print("Without Feature Selection :: Recall")
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

# With Feature Selection
print("With Feature Selection", '\n')

# Without Checking All

#-----------------------------------------------------------------------
# Without Stop

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall')
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: No Stop :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
# With Stop

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Feature Count

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Feature Count', stop_measurment=5)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Feature Count :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Measurment Reached

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Measurment Reached', stop_measurment=0.92)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Measurment Reached :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Stablity

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Stabled', stop_measurment=0.1)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("No Checking All :: Stop :: Stablity :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

########################################################################
# With Checking All

#-----------------------------------------------------------------------
# Without Stop

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', check_all=True)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: No Stop :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
# With Stop

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Feature Count

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Feature Count', stop_measurment=5, check_all=True)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Feature Count :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Measurment Reached

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Measurment Reached', stop_measurment=0.92, check_all=True)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Measurment Reached :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# With Stablity

# With Recall as Measurment Technique
start = time.time()
selected_features_indexes = select_features(data_points_normalized, labels, 'Recall', stop_method='Stabled', stop_measurment=0.1, check_all=True)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input[:, selected_features_indexes].reshape((data_points_train_as_input.shape[0], len(selected_features_indexes))), data_points_test_as_input[:, selected_features_indexes].reshape((data_points_test_as_input.shape[0], len(selected_features_indexes))), labels_train_as_input, labels_test_as_input)
end = time.time()
print("Checking All :: Stop :: Stablity :: Recall")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

Without Feature Selection :: Recall
Result Recall 0.9230769230769231
Result Time 0.15065598487854004 ms 

With Feature Selection 

No Checking All :: No Stop :: Recall
Selected Features Indexes [8, 10, 11]
Result Recall 0.9230769230769231
Result Time 0.1825268268585205 ms 

No Checking All :: Stop :: Feature Count :: Recall
Selected Features Indexes [8, 10, 11]
Result Recall 0.9230769230769231
Result Time 0.18039727210998535 ms 

No Checking All :: Stop :: Measurment Reached :: Recall
Selected Features Indexes [8, 10]
Result Recall 0.9230769230769231
Result Time 0.36055850982666016 ms 

No Checking All :: Stop :: Stablity :: Recall
Selected Features Indexes [8, 10]
Result Recall 0.9230769230769231
Result Time 0.32265353202819824 ms 

Checking All :: No Stop :: Recall
Selected Features Indexes [3, 8, 9, 10, 11, 12, 13, 17, 19, 20]
Result Recall 0.9230769230769231
Result Time 0.3018765449523926 ms 

Checking All :: Stop :: Feature Count :: Recall
Selected Features Indexes [3, 8, 10, 11, 

Part Two -> CoC

In [22]:
def get_clusters_indexes(labels_array, label_value):
  separated_clustering_labels = [label_index for label_index in range(len(labels_array)) if labels_array[label_index] == label_value]
  return separated_clustering_labels

In [23]:
def get_clusters(clustering_labels, predistion_labels, k):
  separated_clustering_indexes = []
  separated_prediction_indexes = []

  for label_value in range(k):
    separated_clustering_indexes.append([])
    separated_clustering_indexes[label_value] = get_clusters_indexes(clustering_labels, label_value)

    separated_prediction_indexes.append([])
    separated_prediction_indexes[label_value] = get_clusters_indexes(predistion_labels, label_value)

  return separated_clustering_indexes, separated_prediction_indexes

In [24]:
def implement_clustering(data_points_train, data_points_test, k):
  kmeans = KMeans(n_clusters=k, random_state=seed).fit(data_points_train)
  clustering_labels = kmeans.labels_

  predistion_labels = kmeans.predict(data_points_test)

  separated_clustering_indexes, separated_prediction_indexes = get_clusters(clustering_labels, predistion_labels, k)

  return np.array(separated_clustering_indexes), np.array(separated_prediction_indexes)

In [25]:
def implement_clustering_and_classification(data_points, measurement_type, cluster_k_value, labels_type=0):
  data_points_train, data_points_test, labels_train, labels_test = train_test_split(data_points, labels, random_state=seed, test_size=0.2)
  separated_clustering_indexes, separated_prediction_indexes = implement_clustering(data_points_train, data_points_test, cluster_k_value)
  measurement = []

  for label_value in range(cluster_k_value):
    measurement.append(implement_classification(measurement_type, 'Random Forest', data_points_train[separated_clustering_indexes[label_value]].reshape((len(separated_clustering_indexes[label_value]), data_points_train.shape[1])), data_points_test[separated_prediction_indexes[label_value]].reshape((len(separated_prediction_indexes[label_value]), data_points_test.shape[1])), labels_train[separated_clustering_indexes[label_value]], labels_test[separated_prediction_indexes[label_value]], labels_type=labels_type))
  
  return np.array(measurement)


In [26]:
# With Accuracy
########################################################################

# Without Clustering + Classification
start = time.time()
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_as_input, data_points_test_as_input, labels_train_as_input, labels_test_as_input)
end = time.time()
print("Without CoC :: Accuracy")
print("Result Accuracy", measurement)
print("Result Time", end - start, "ms", "\n")

# With Clustering + Classification
print("With CoC", "\n")

#-----------------------------------------------------------------------
k = 1
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Accuracy", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 2
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Accuracy", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 3
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Accuracy", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 4
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Accuracy", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 5
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Accuracy", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

Without CoC :: Accuracy
Result Accuracy 0.95
Result Time 0.16381263732910156 ms 

With CoC 

k =  1  :: Accuracy
Accuracy of Classification on Each Cluster [0.95]
Result Accuracy (Average) 0.95
Result Time 0.34263062477111816 ms 

k =  2  :: Accuracy
Accuracy of Classification on Each Cluster [0.95402299 0.92307692]
Result Accuracy (Average) 0.938549955791335
Result Time 0.3083031177520752 ms 

k =  3  :: Accuracy
Accuracy of Classification on Each Cluster [1.         0.98529412 0.9047619 ]
Result Accuracy (Average) 0.9633520074696545
Result Time 0.43161487579345703 ms 

k =  4  :: Accuracy
Accuracy of Classification on Each Cluster [0.97101449 1.         1.         0.93333333]
Result Accuracy (Average) 0.9760869565217392
Result Time 0.5720024108886719 ms 

k =  5  :: Accuracy
Accuracy of Classification on Each Cluster [0.98461538 1.         1.         0.82352941 0.9       ]
Result Accuracy (Average) 0.9416289592760181
Result Time 0.6922485828399658 ms 



In [27]:
# With Recall
########################################################################

# Without Clustering + Classification
start = time.time()
measurement = implement_classification('Recall', 'Random Forest', data_points_train_as_input, data_points_test_as_input, labels_train_as_input, labels_test_as_input)
end = time.time()
print("Without CoC :: Recall")
print("Result Recall", measurement)
print("Result Time", end - start, "ms", "\n")

# With Clustering + Classification
print("With CoC", "\n")

#-----------------------------------------------------------------------
k = 1
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Recall", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 2
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Recall", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 3
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Recall", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 4
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Recall", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

#-----------------------------------------------------------------------
k = 5
start = time.time()
measurement = implement_clustering_and_classification(data_points_normalized, "Recall", cluster_k_value=k)
end = time.time()
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement))
print("Result Time", end - start, "ms", "\n")

Without CoC :: Recall
Result Recall 0.9230769230769231
Result Time 0.15340209007263184 ms 

With CoC 

k =  1  :: Recall
Recall of Classification on Each Cluster [0.92307692]
Result Recall (Average) 0.9230769230769231
Result Time 0.17303681373596191 ms 

k =  2  :: Recall
Recall of Classification on Each Cluster [0.85714286 1.        ]
Result Recall (Average) 0.9285714285714286
Result Time 0.3184657096862793 ms 

k =  3  :: Recall
Recall of Classification on Each Cluster [1. 1. 1.]
Result Recall (Average) 1.0
Result Time 0.5183489322662354 ms 

k =  4  :: Recall
Recall of Classification on Each Cluster [1. 1. 0. 1.]
Result Recall (Average) 0.75
Result Time 2.677194833755493 ms 

k =  5  :: Recall
Recall of Classification on Each Cluster [1.         1.         1.         0.85714286 0.        ]
Result Recall (Average) 0.7714285714285715
Result Time 1.163459300994873 ms 



Part Three -> Change Number of Classes

In [28]:
A_samples = x[0:100]
B_samples = x[100:200]
C_samples = x[200:300]
D_samples = x[300:400]
E_samples = x[400:500]

In [29]:
sampling_freq = 173.6

b, a = butter(3, [0.5,40], btype='bandpass',fs=sampling_freq)

A_samples_filtered = np.array([lfilter(b, a, A_samples[ind, :]) for ind in range(A_samples.shape[0])])
B_samples_filtered = np.array([lfilter(b, a, B_samples[ind, :]) for ind in range(B_samples.shape[0])])
C_samples_filtered = np.array([lfilter(b, a, C_samples[ind, :]) for ind in range(C_samples.shape[0])])
D_samples_filtered = np.array([lfilter(b, a, D_samples[ind, :]) for ind in range(D_samples.shape[0])])
E_samples_filtered = np.array([lfilter(b, a, E_samples[ind, :]) for ind in range(E_samples.shape[0])])

In [30]:
A_samples = A_samples_filtered
B_samples = B_samples_filtered
C_samples = C_samples_filtered
D_samples = D_samples_filtered
E_samples = E_samples_filtered

In [31]:
# Case 1 -> Classes: AB CDE
samples_with_label_0 = np.concatenate((A_samples, B_samples))
samples_with_label_1 = np.concatenate((C_samples, D_samples, E_samples))

data_points_not_normalized_case_1 = np.concatenate((samples_with_label_0, samples_with_label_1))
features_added_data_points_not_normalized_case_1 = get_features_added_data_points(data_points_not_normalized_case_1)
data_points_normalized_case_1 = normalization(features_added_data_points_not_normalized_case_1)

labels_case_1 = np.concatenate((np.zeros((len(samples_with_label_0), 1)), np.ones((len(samples_with_label_1), 1))))

In [32]:
data_points_train_case_1, data_points_test_case_1, labels_train_case_1, labels_test_case_1 = train_test_split(data_points_normalized_case_1, labels_case_1, random_state=seed, test_size=0.2)

In [33]:
# With Accuracy
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_1, data_points_test_case_1, labels_train_case_1, labels_test_case_1)
print("Without Feature Selection :: Without CoC :: Accuracy")
print("Result Accuracy", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_1, labels_case_1, 'Accuracy', stop_method='Feature Count', stop_measurment=5, check_all=False)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_1[:, selected_features_indexes].reshape((data_points_train_case_1.shape[0], len(selected_features_indexes))), data_points_test_case_1[:, selected_features_indexes].reshape((data_points_test_case_1.shape[0], len(selected_features_indexes))), labels_train_case_1, labels_test_case_1)
print("With Feature Selection :: Without CoC :: Accuracy")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement, "\n")

# With CoC
k = 4
measurement = implement_clustering_and_classification(data_points_normalized_case_1, "Accuracy", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Accuracy")
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Accuracy
Result Accuracy 0.85 

With Feature Selection :: Without CoC :: Accuracy
Checking All :: Stop :: Feature Count
Selected Features Indexes [0, 3, 9, 10, 11]
Result Accuracy 0.77 

Without Feature Selection :: With CoC :: Accuracy
k =  4  :: Accuracy
Accuracy of Classification on Each Cluster [0.97101449 1.         1.         0.93333333]
Result Accuracy (Average) 0.9760869565217392 



In [34]:
# With Recall
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_1, data_points_test_case_1, labels_train_case_1, labels_test_case_1)
print("Without Feature Selection :: Without CoC :: Recall")
print("Result Recall", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_1, labels_case_1, 'Recall')
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_1[:, selected_features_indexes].reshape((data_points_train_case_1.shape[0], len(selected_features_indexes))), data_points_test_case_1[:, selected_features_indexes].reshape((data_points_test_case_1.shape[0], len(selected_features_indexes))), labels_train_case_1, labels_test_case_1)
print("With Feature Selection :: Without CoC :: Recall")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement, "\n")

# With CoC
k = 3
measurement = implement_clustering_and_classification(data_points_normalized_case_1, "Recall", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Recall")
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Recall
Result Recall 0.9523809523809523 

With Feature Selection :: Without CoC :: Recall
Checking All :: Stop :: Feature Count
Selected Features Indexes [0, 3, 9, 10, 11, 12, 13, 19, 20]
Result Recall 0.9682539682539683 

Without Feature Selection :: With CoC :: Recall
k =  3  :: Recall
Recall of Classification on Each Cluster [1. 1. 1.]
Result Recall (Average) 1.0 



In [35]:
# Case 2 -> Classes: BD ACE
samples_with_label_0 = np.concatenate((B_samples, D_samples))
samples_with_label_1 = np.concatenate((A_samples, C_samples, E_samples))

data_points_not_normalized_case_2 = np.concatenate((samples_with_label_0, samples_with_label_1))
features_added_data_points_not_normalized_case_2 = get_features_added_data_points(data_points_not_normalized_case_2)
data_points_normalized_case_2 = normalization(features_added_data_points_not_normalized_case_2)

labels_case_2 = np.concatenate((np.zeros((len(samples_with_label_0), 1)), np.ones((len(samples_with_label_1), 1))))

In [36]:
data_points_train_case_2, data_points_test_case_2, labels_train_case_2, labels_test_case_2 = train_test_split(data_points_normalized_case_2, labels_case_2, random_state=seed, test_size=0.2)

In [37]:
# With Accuracy
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_2, data_points_test_case_2, labels_train_case_2, labels_test_case_2)
print("Without Feature Selection :: Without CoC :: Accuracy")
print("Result Accuracy", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_2, labels_case_2, 'Accuracy', stop_method='Feature Count', stop_measurment=5, check_all=False)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_2[:, selected_features_indexes].reshape((data_points_train_case_2.shape[0], len(selected_features_indexes))), data_points_test_case_2[:, selected_features_indexes].reshape((data_points_test_case_2.shape[0], len(selected_features_indexes))), labels_train_case_2, labels_test_case_2)
print("With Feature Selection :: Without CoC :: Accuracy")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement, "\n")

# With CoC
k = 4
measurement = implement_clustering_and_classification(data_points_normalized_case_2, "Accuracy", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Accuracy")
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Accuracy
Result Accuracy 0.7 

With Feature Selection :: Without CoC :: Accuracy
Checking All :: Stop :: Feature Count
Selected Features Indexes [1, 2, 9, 12, 19]
Result Accuracy 0.69 

Without Feature Selection :: With CoC :: Accuracy
k =  4  :: Accuracy
Accuracy of Classification on Each Cluster [0.98550725 1.         1.         1.        ]
Result Accuracy (Average) 0.9963768115942029 



In [38]:
# With Recall
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_2, data_points_test_case_2, labels_train_case_2, labels_test_case_2)
print("Without Feature Selection :: Without CoC :: Recall")
print("Result Recall", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_2, labels_case_2, 'Recall')
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_2[:, selected_features_indexes].reshape((data_points_train_case_2.shape[0], len(selected_features_indexes))), data_points_test_case_2[:, selected_features_indexes].reshape((data_points_test_case_2.shape[0], len(selected_features_indexes))), labels_train_case_2, labels_test_case_2)
print("With Feature Selection :: Without CoC :: Recall")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement, "\n")

# With CoC
k = 3
measurement = implement_clustering_and_classification(data_points_normalized_case_2, "Recall", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Recall")
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Recall
Result Recall 0.746031746031746 

With Feature Selection :: Without CoC :: Recall
Checking All :: Stop :: Feature Count
Selected Features Indexes [2, 3, 8, 9, 10, 11, 12, 13, 19, 20]
Result Recall 0.7936507936507936 

Without Feature Selection :: With CoC :: Recall
k =  3  :: Recall
Recall of Classification on Each Cluster [0.8  0.75 1.  ]
Result Recall (Average) 0.85 



In [39]:
# Case 3 -> Classes: AC BDE
samples_with_label_0 = np.concatenate((A_samples, C_samples))
samples_with_label_1 = np.concatenate((B_samples, D_samples, E_samples))

data_points_not_normalized_case_3 = np.concatenate((samples_with_label_0, samples_with_label_1))
features_added_data_points_not_normalized_case_3 = get_features_added_data_points(data_points_not_normalized_case_3)
data_points_normalized_case_3 = normalization(features_added_data_points_not_normalized_case_3)

labels_case_3 = np.concatenate((np.zeros((len(samples_with_label_0), 1)), np.ones((len(samples_with_label_1), 1))))

In [40]:
data_points_train_case_3, data_points_test_case_3, labels_train_case_3, labels_test_case_3 = train_test_split(data_points_normalized_case_3, labels_case_3, random_state=seed, test_size=0.2)

In [41]:
# With Accuracy
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_3, data_points_test_case_3, labels_train_case_3, labels_test_case_3)
print("Without Feature Selection :: Without CoC :: Accuracy")
print("Result Accuracy", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_3, labels_case_3, 'Accuracy', stop_method='Feature Count', stop_measurment=5, check_all=False)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_3[:, selected_features_indexes].reshape((data_points_train_case_3.shape[0], len(selected_features_indexes))), data_points_test_case_3[:, selected_features_indexes].reshape((data_points_test_case_3.shape[0], len(selected_features_indexes))), labels_train_case_3, labels_test_case_3)
print("With Feature Selection :: Without CoC :: Accuracy")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement, "\n")

# With CoC
k = 4
measurement = implement_clustering_and_classification(data_points_normalized_case_3, "Accuracy", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Accuracy")
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Accuracy
Result Accuracy 0.71 

With Feature Selection :: Without CoC :: Accuracy
Checking All :: Stop :: Feature Count
Selected Features Indexes [7, 10, 11, 12, 13]
Result Accuracy 0.7 

Without Feature Selection :: With CoC :: Accuracy
k =  4  :: Accuracy
Accuracy of Classification on Each Cluster [0.98550725 1.         1.         0.88888889]
Result Accuracy (Average) 0.9685990338164251 



In [42]:
# With Recall
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_3, data_points_test_case_3, labels_train_case_3, labels_test_case_3)
print("Without Feature Selection :: Without CoC :: Recall")
print("Result Recall", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_3, labels_case_3, 'Recall')
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_3[:, selected_features_indexes].reshape((data_points_train_case_3.shape[0], len(selected_features_indexes))), data_points_test_case_3[:, selected_features_indexes].reshape((data_points_test_case_3.shape[0], len(selected_features_indexes))), labels_train_case_3, labels_test_case_3)
print("With Feature Selection :: Without CoC :: Recall")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement, "\n")

# With CoC
k = 3
measurement = implement_clustering_and_classification(data_points_normalized_case_3, "Recall", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Recall")
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Recall
Result Recall 0.7619047619047619 

With Feature Selection :: Without CoC :: Recall
Checking All :: Stop :: Feature Count
Selected Features Indexes [3, 7, 9, 10, 11, 12, 13, 19, 20]
Result Recall 0.7936507936507936 

Without Feature Selection :: With CoC :: Recall
k =  3  :: Recall
Recall of Classification on Each Cluster [1. 1. 1.]
Result Recall (Average) 1.0 



In [43]:
# Case 4 -> Classes: AB CD E
samples_with_label_0 = np.concatenate((A_samples, B_samples))
samples_with_label_1 = np.concatenate((C_samples, D_samples))
samples_with_label_2 = E_samples

data_points_not_normalized_case_4 = np.concatenate((samples_with_label_0, samples_with_label_1, samples_with_label_2))
features_added_data_points_not_normalized_case_4 = get_features_added_data_points(data_points_not_normalized_case_4)
data_points_normalized_case_4 = normalization(features_added_data_points_not_normalized_case_4)

labels_case_4 = np.concatenate((np.zeros((len(samples_with_label_0), 1)), np.ones((len(samples_with_label_1), 1)), np.full((len(samples_with_label_2), 1), 2)))

In [44]:
data_points_train_case_4, data_points_test_case_4, labels_train_case_4, labels_test_case_4 = train_test_split(data_points_normalized_case_4, labels_case_4, random_state=seed, test_size=0.2)

In [45]:
# With Accuracy
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_4, data_points_test_case_4, labels_train_case_4, labels_test_case_4)
print("Without Feature Selection :: Without CoC :: Accuracy")
print("Result Accuracy", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_4, labels_case_4, 'Accuracy', stop_method='Feature Count', stop_measurment=5, check_all=False)
measurement = implement_classification('Accuracy', 'Random Forest', data_points_train_case_4[:, selected_features_indexes].reshape((data_points_train_case_4.shape[0], len(selected_features_indexes))), data_points_test_case_4[:, selected_features_indexes].reshape((data_points_test_case_4.shape[0], len(selected_features_indexes))), labels_train_case_4, labels_test_case_4)
print("With Feature Selection :: Without CoC :: Accuracy")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Accuracy", measurement, "\n")

# With CoC
k = 4
measurement = implement_clustering_and_classification(data_points_normalized_case_4, "Accuracy", cluster_k_value=k)
print("Without Feature Selection :: With CoC :: Accuracy")
print("k = ", k, " :: Accuracy")
print("Accuracy of Classification on Each Cluster", measurement)
print("Result Accuracy (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Accuracy
Result Accuracy 0.81 

With Feature Selection :: Without CoC :: Accuracy
Checking All :: Stop :: Feature Count
Selected Features Indexes [4, 9, 10, 11, 20]
Result Accuracy 0.78 

Without Feature Selection :: With CoC :: Accuracy
k =  4  :: Accuracy
Accuracy of Classification on Each Cluster [0.97101449 1.         1.         0.93333333]
Result Accuracy (Average) 0.9760869565217392 



In [48]:
# With Recall
########################################################################

# Without Feature Selection - Without CoC
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_4, data_points_test_case_4, labels_train_case_4, labels_test_case_4, labels_type=1)
print("Without Feature Selection :: Without CoC :: Recall")
print("Result Recall", measurement, "\n")

# With Feature Selection
selected_features_indexes = select_features(data_points_normalized_case_4, labels_case_4, 'Recall', labels_type=1)
measurement = implement_classification('Recall', 'Random Forest', data_points_train_case_4[:, selected_features_indexes].reshape((data_points_train_case_4.shape[0], len(selected_features_indexes))), data_points_test_case_4[:, selected_features_indexes].reshape((data_points_test_case_4.shape[0], len(selected_features_indexes))), labels_train_case_4, labels_test_case_4, labels_type=1)
print("With Feature Selection :: Without CoC :: Recall")
print("Checking All :: Stop :: Feature Count")
print("Selected Features Indexes", selected_features_indexes)
print("Result Recall", measurement, "\n")

# With CoC
k = 3
measurement = implement_clustering_and_classification(data_points_normalized_case_4, "Recall", cluster_k_value=k, labels_type=1)
print("Without Feature Selection :: With CoC :: Recall")
print("k = ", k, " :: Recall")
print("Recall of Classification on Each Cluster", measurement)
print("Result Recall (Average)", np.average(measurement), "\n")

Without Feature Selection :: Without CoC :: Recall
Result Recall 0.81 

With Feature Selection :: Without CoC :: Recall
Checking All :: Stop :: Feature Count
Selected Features Indexes [4, 9, 10, 11, 12, 13, 19, 20]
Result Recall 0.8 

Without Feature Selection :: With CoC :: Recall
k =  3  :: Recall
Recall of Classification on Each Cluster [1.         0.98529412 0.9047619 ]
Result Recall (Average) 0.9633520074696545 

