In [1]:
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import neurokit2 as nk
import matplotlib.pyplot as plt

# Global settings
pd.set_option('display.max_rows', 200)
pd.options.display.float_format = '{:.2f}'.format
plt.rcParams["figure.figsize"] = (20, 6)
plt.style.use('ggplot') # nicer plots
pd.set_option('display.max_columns', None)

# Data loading
df = pd.read_csv('output/combined_feature_engineered_tnt_only.csv')

df['datetime'] = pd.to_datetime(df['datetime'])
df.drop(['datetime', 'unix_time', 'source', 'response', 'intrusion', 'intrusion_nothink', 'trialcode', 'session_id'], axis=1, inplace=True)

X = df.drop('intrusion_tnt', axis=1)  # Features: All columns except 'intrusion_tnt'
y = df['intrusion_tnt']  # Labels: 'intrusion_tnt' column

# Assuming 'participant' is the column with participant IDs
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

train_idx, test_idx = next(gss.split(X, y, groups=df['participant']))

# Create the training and test sets
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

from sklearn.impute import KNNImputer

# Initialize a new KNNImputer instance
knn_imputer = KNNImputer(n_neighbors=5)

# Fit the imputer on the training data
knn_imputer.fit(X_train)

# Transform the training and test data
X_train = knn_imputer.transform(X_train)
X_test = knn_imputer.transform(X_test)

### Short literature review on (conditional) recurrent GANS
- ([Nikolaidis, 2019-05-22](zotero://select/items/1_72NRYTCY)). Inspired by Esteban et al., no source code available. Focusses on application of synthetic data.
- ([Esteban, 2017-12-03](zotero://select/items/2_DHLZEJRV)) --> source code: https://github.com/ratschlab/RGAN. Original paper.
- [Conditional GAN using TorchGAN](https://torchgan.readthedocs.io/en/latest/modules/models.html?highlight=conditional#conditional-gan)

Training GANs appears to be very hard: https://webcache.googleusercontent.com/search?q=cache:https://towardsdatascience.com/10-lessons-i-learned-training-generative-adversarial-networks-gans-for-a-year-c9071159628&sca_esv=ff0dae4b23f8bbed&sca_upv=1&strip=1&vwsrc=0. 

Perhaps variational autoencoders are a better idea?

In [3]:
from timeVAE_abudesai.vae_conv_model import VariationalAutoencoderConv

# Initialize the VAE model
vae = VariationalAutoencoderConv(
    seq_len=2,
    feat_dim=2,
    latent_dim = 2,
    hidden_layer_sizes=[100, 200],
)

ModuleNotFoundError: No module named 'tensorflow'