In [None]:
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)

In [None]:
def consecutive_difference(df):
    diff_columns = lie_data.columns[1:28]
    grouped = df.groupby(['question_no'])
    df[diff_columns] = grouped[diff_columns].diff()
    df.dropna(inplace=True)


def group_split(X, y, group, train_size = 0.8):
    splitter = GroupShuffleSplit(train_size = train_size)
    train, test = next(splitter.split(X, y, groups = group))
    return (X.iloc[train], X.iloc[test], y.iloc[train], y.iloc[test])

# Import datasets

In [None]:
lie_data = pd.DataFrame([])

for i in range(1,4):
    candidate = pd.read_csv(rf'.\data\VID{i}_data.csv')
    # change Q
    candidate['question_no'] = i + 0.01*candidate['question_no']
    
    lie_data  = pd.concat( [lie_data, candidate] )

print(lie_data.shape)
lie_data

In [None]:
lie_data = lie_data.sample(frac=1)

X = lie_data.copy().dropna()
questio_no = X['question_no']

X = X.drop(['frame', 'question_no'], axis=1)
y = X.pop('TRUTH')

X_train, X_valid, y_train, y_valid = group_split(X, y, questio_no)
X_train

In [None]:
print("\n\tDATABASE")
print(X.shape)
print("TRUE :", lie_data[ lie_data['TRUTH'] == 1 ].shape)
print("FALSE:", lie_data[ lie_data['TRUTH'] == 0 ].shape)

print("\n\tTRAIN")
print(X_train.shape)
print("TRUE :", sum(y_train == 1 ) )
print("FALSE:", sum(y_train == 0 ) )

print("\n\tTEST")
print(X_valid.shape)
print("TRUE :", sum(y_valid == 1 ) )
print("FALSE:", sum(y_valid == 0 ) )
# X_train
# X_valid

In [None]:
input_shape = X.shape[1]
input_shape

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(input_shape,)),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    
    layers.Dense(1, activation='relu')
])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    optimizer = optimizer,
    loss="binary_crossentropy",
    metrics=['binary_accuracy']
)

early_stopping = keras.callbacks.EarlyStopping(
    patience  = 5,
    min_delta = 0.001,
    restore_best_weights=True,
)

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size = 256,
    epochs = 256,
    # callbacks=[early_stopping],
)

history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy", ylim=[-0.1, 1.1])

In [None]:
model.summary()

In [None]:
sequential_data = []  # this is a list that will CONTAIN the sequences
prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

for i in df.values:  # iterate over the values
    prev_days.append([n for n in i[:-1]])  # store all but the target
    if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
        sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

random.shuffle(sequential_data)  # shuffle for good measure.

# PAST

In [None]:
import matplotlib.pyplot as plt
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")

plt.savefig('b.png')