# Определения состояния глаз человека: открыты или закрыты

**Данные**: временной ряд для нейроинтерфейса.

**Задача**: по измерениям электроэнцефалограммы определить состояние глаз человека.

В этом ноутбуке предложено сравнение качества классификации состояния глаз человека на реальном и синтетическом наборе данных.


*Синтетический набор данных производится путем генерации временного ряда с помощью модели условной генеративно-состязательной сети  (Conditional GAN).*

## Загрузка библиотек

In [15]:
from sklearn.metrics import confusion_matrix
from tensorflow import keras
import tensorflow as tf

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install tsgm

In [3]:
import numpy as np
import pandas as pd
from scipy.io.arff import loadarff
from sklearn.model_selection import train_test_split
import tsgm

## Чтение данных

In [5]:
raw_data = loadarff('EEG Eye State.arff')
df = pd.DataFrame(raw_data[0])

In [7]:
df.head()

Unnamed: 0,AF3,F7,F3,FC5,T7,P7,O1,O2,P8,T8,FC6,F4,F8,AF4,eyeDetection
0,4329.23,4009.23,4289.23,4148.21,4350.26,4586.15,4096.92,4641.03,4222.05,4238.46,4211.28,4280.51,4635.9,4393.85,b'0'
1,4324.62,4004.62,4293.85,4148.72,4342.05,4586.67,4097.44,4638.97,4210.77,4226.67,4207.69,4279.49,4632.82,4384.1,b'0'
2,4327.69,4006.67,4295.38,4156.41,4336.92,4583.59,4096.92,4630.26,4207.69,4222.05,4206.67,4282.05,4628.72,4389.23,b'0'
3,4328.72,4011.79,4296.41,4155.9,4343.59,4582.56,4097.44,4630.77,4217.44,4235.38,4210.77,4287.69,4632.31,4396.41,b'0'
4,4326.15,4011.79,4292.31,4151.28,4347.69,4586.67,4095.9,4627.69,4210.77,4244.1,4212.82,4288.21,4632.82,4398.46,b'0'


In [8]:
closed = df["eyeDetection"].unique()[0]
open = df["eyeDetection"].unique()[1]

In [9]:
df.replace({closed: 0, open: 1}, inplace=True)

In [10]:
X = df.drop("eyeDetection", axis = 1)
y = df["eyeDetection"]

In [11]:
X_array = X.values

In [12]:
X_array.shape

(14980, 14)

## Создание синтетических данных

### Предобработка данных

In [13]:
X_array = X_array.reshape((X_array.shape[0], 1, X_array.shape[1]))

In [16]:
scaler = tsgm.utils.TSFeatureWiseScaler((-1, 1))
X_array = scaler.fit_transform(X_array)
y_keras = keras.utils.to_categorical(y, 2)

X_array = X_array.astype(np.float32)
y_keras = y_keras.astype(np.float32)

In [17]:
seq_len = X_array.shape[1]
feature_dim = X_array.shape[2]
latent_dim = 64
output_dim = 2
n_epochs = 200
batch_size = 128

### Обучение модели генерации синтетических данных

In [18]:
dataset = tf.data.Dataset.from_tensor_slices((X_array, y_keras))
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)

In [19]:
architecture = tsgm.models.architectures.zoo["cgan_base_c4_l1"](
    seq_len=seq_len, feat_dim=feature_dim,
    latent_dim=latent_dim, output_dim=output_dim)
discriminator, generator = architecture.discriminator, architecture.generator

cond_gan = tsgm.models.cgan.ConditionalGAN(
    discriminator=discriminator, generator=generator, latent_dim=latent_dim
)
cond_gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

cond_gan.fit(dataset, epochs=n_epochs)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7952e663ba00>

In [20]:
X_gen = cond_gan.generate(y_keras)
X_gen = X_gen.numpy()
y_gen = y_keras

In [21]:
type(X_gen)

numpy.ndarray

In [22]:
np.save("generated.npy", X_gen)

## Обучение базового классификатора

In [12]:
X_gen = np.load("generated.npy")

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X_array, y, test_size=0.1, random_state=33)

In [25]:
X_train = X_train.reshape(X_train.shape[0], 14)
X_gen = X_gen.reshape(X_gen.shape[0], 14)
X_test = X_test.reshape(X_test.shape[0], 14)

### на оригинальных данных

In [26]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

In [27]:
y_pred = classifier.predict(X_test)

In [28]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy score on original data:",accuracy)
precision = metrics.precision_score(y_test, y_pred)
print("Precision score on original data:",precision)
recall = metrics.recall_score(y_test, y_pred)
print("Recall score on original data:",recall)

[[709 109]
 [136 544]]
Accuracy score: 0.8364485981308412
Precision score: 0.8330781010719756
Recall score: 0.8


### на синтетических данных

In [29]:
X_gen_train, X_gen_test, y_gen_train, y_gen_test = train_test_split(X_gen, y, test_size=0.1, random_state=33)

In [30]:
classifier = DecisionTreeClassifier()
classifier.fit(X_gen_train, y_gen_train)

In [31]:
y_gen_pred = classifier.predict(X_gen_test)

In [32]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = metrics.accuracy_score(y_gen_test, y_gen_pred)
print("Accuracy score on synthetic data:",accuracy)
precision = metrics.precision_score(y_gen_test, y_gen_pred)
print("Precision score on synthetic data:",precision)
recall = metrics.recall_score(y_gen_test, y_gen_pred)
print("Recall score on synthetic data:",recall)

[[709 109]
 [136 544]]
Accuracy score on synthetic data: 0.9753004005340454
Precision score on synthetic data: 0.9762962962962963
Recall score on synthetic data: 0.9691176470588235
