In [1]:
import numpy as np
import pandas as pd

In [7]:
data = pd.read_csv('EEG_Eye_State.csv')
X = data.loc[:,data.columns !='Eye_detection']
y = data['Eye_detection']

In [10]:
X = X.to_numpy()
y = y.to_numpy()

In [11]:
# Set our RNG seed for reproducibility.
RANDOM_STATE_SEED = 123
np.random.seed(RANDOM_STATE_SEED)

In [13]:
# Isolate our examples for our labeled dataset.
n_labeled_examples = X.shape[0]
training_indices = np.random.randint(low=0, high=n_labeled_examples + 1, size=3000)

X_train = X[training_indices]
y_train = y[training_indices]

# Isolate the non-training examples we'll be querying.
X_pool = np.delete(X, (training_indices), axis=0)
y_pool = np.delete(y, (training_indices), axis=0)

In [14]:
X_train.shape

(3000, 14)

In [15]:
X_pool.shape

(12262, 14)

In [23]:
from modAL.models import ActiveLearner
from modAL.uncertainty import uncertainty_sampling,entropy_sampling
from sklearn.ensemble import RandomForestClassifier

# Specify our core estimator along with it's active learning model.

learner_us = ActiveLearner(estimator=RandomForestClassifier(), query_strategy=uncertainty_sampling, X_training=X_train, y_training=y_train)
learner_es = ActiveLearner(estimator=RandomForestClassifier(), query_strategy=entropy_sampling, X_training=X_train, y_training=y_train)

In [24]:
# Isolate the data we'll need for plotting.
predictions_us = learner_us.predict(X)
is_correct = (predictions_us == y)

In [25]:
predictions_es = learner_es.predict(X)
is_correct = (predictions_es == y)

In [30]:
# Record our learner's score on the raw data.
unqueried_score_us = learner_us.score(X, y)
unqueried_score_es = learner_es.score(X, y)

In [33]:
print("Active Learner Uncertainity Sampling class predictions (Accuracy: {score:.3f})".format(score=unqueried_score_us))
print("Active Learner Entropy Sampling class predictions (Accuracy: {score:.3f})".format(score=unqueried_score_es))

Active Learner Uncertainity Sampling class predictions (Accuracy: 0.897)
Active Learner Entropy Sampling class predictions (Accuracy: 0.894)


In [38]:
N_QUERIES = 20
performance_history = [unqueried_score_us]
print("Uncertainity Sampling")
# Allow our model to query our unlabeled dataset for the most
# informative points according to our query strategy (uncertainty sampling).
for index in range(N_QUERIES):
  query_index, query_instance = learner_us.query(X_pool)

  # Teach our ActiveLearner model the record it has requested.
  X1, y1 = X_pool[query_index].reshape(1, -1), y_pool[query_index].reshape(1, )
  learner_us.teach(X=X1, y=y1)

  # Remove the queried instance from the unlabeled pool.
  X_pool, y_pool = np.delete(X_pool, query_index, axis=0), np.delete(y_pool, query_index)

  # Calculate and report our model's accuracy.
  model_accuracy = learner_us.score(X, y)
  print('Accuracy after query {n}: {acc:0.4f}'.format(n=index + 1, acc=model_accuracy))

  # Save our model's performance for plotting.
  performance_history.append(model_accuracy)

Uncertainity Sampling
Accuracy after query 1: 0.8933
Accuracy after query 2: 0.8947
Accuracy after query 3: 0.8948
Accuracy after query 4: 0.8942
Accuracy after query 5: 0.8959
Accuracy after query 6: 0.8953
Accuracy after query 7: 0.8965
Accuracy after query 8: 0.8943
Accuracy after query 9: 0.8973
Accuracy after query 10: 0.8977
Accuracy after query 11: 0.8969
Accuracy after query 12: 0.8997
Accuracy after query 13: 0.8987
Accuracy after query 14: 0.8984
Accuracy after query 15: 0.8960
Accuracy after query 16: 0.9003
Accuracy after query 17: 0.8968
Accuracy after query 18: 0.8993
Accuracy after query 19: 0.8954
Accuracy after query 20: 0.8985


In [41]:
N_QUERIES = 20
performance_history = [unqueried_score_es]
print("Entropy Sampling")
# Allow our model to query our unlabeled dataset for the most
# informative points according to our query strategy (uncertainty sampling).
for index in range(N_QUERIES):
  query_index, query_instance = learner_es.query(X_pool)

  # Teach our ActiveLearner model the record it has requested.
  X1, y1 = X_pool[query_index].reshape(1, -1), y_pool[query_index].reshape(1, )
  learner_es.teach(X=X1, y=y1)

  # Remove the queried instance from the unlabeled pool.
  X_pool, y_pool = np.delete(X_pool, query_index, axis=0), np.delete(y_pool, query_index)

  # Calculate and report our model's accuracy.
  model_accuracy = learner_es.score(X, y)
  print('Accuracy after query {n}: {acc:0.4f}'.format(n=index + 1, acc=model_accuracy))

  # Save our model's performance for plotting.
  performance_history.append(model_accuracy)

Entropy Sampling
Accuracy after query 1: 0.8985
Accuracy after query 2: 0.8977
Accuracy after query 3: 0.8981
Accuracy after query 4: 0.9001
Accuracy after query 5: 0.9001
Accuracy after query 6: 0.9021
Accuracy after query 7: 0.9031
Accuracy after query 8: 0.8995
Accuracy after query 9: 0.8996
Accuracy after query 10: 0.8990
Accuracy after query 11: 0.8999
Accuracy after query 12: 0.8979
Accuracy after query 13: 0.8977
Accuracy after query 14: 0.8991
Accuracy after query 15: 0.8991
Accuracy after query 16: 0.8986
Accuracy after query 17: 0.9023
Accuracy after query 18: 0.9019
Accuracy after query 19: 0.8987
Accuracy after query 20: 0.9016
