# Lie detector
#### CNN model

> Mayur Sharma\
> Rohan deep Kujur\
> Khushi Tulsian\
> Atharva Karve

In [None]:
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)

# Import and Combine datasets

In [None]:
# Segments viewer
N = 3
candidate = pd.read_csv(rf'.\data\VID{N}_data.csv')
i = 1
for group in candidate.groupby("question_no"):
    print(i, len(group[1]))
    i += 1

In [None]:
lie_data = pd.DataFrame([])

for i in (1,2,3,5):
    candidate = pd.read_csv(rf'.\data\VID{i}_data.csv')
    print(candidate.shape)
    # change Q
    candidate['question_no'] = i + 0.01*candidate['question_no']
    
    lie_data  = pd.concat( [lie_data, candidate] )

print(lie_data.shape)
print("T:",lie_data[lie_data["TRUTH"] == 0].shape[0])
print("F:", lie_data[lie_data["TRUTH"] == 1].shape[0])
lie_data

# cleaning dataset

In [None]:
def consecutive_difference(df):
    diff_columns = lie_data.columns[1:28]

    df.sort_values(['frame'], inplace=True)
    grouped = df.groupby(['question_no'])
    df[diff_columns] = grouped[diff_columns].diff()
    df.dropna(inplace=True)


def group_split(X, y, group, train_size = 0.8):
    splitter = GroupShuffleSplit(train_size = train_size)
    train, test = next(splitter.split(X, y, groups = group))
    return (X.iloc[train], X.iloc[test], y.iloc[train], y.iloc[test])

In [None]:
lie_data = lie_data.sample(frac=1)

X = lie_data.copy().dropna()
question_no = X['question_no']

X = X.drop(['frame', 'question_no'], axis=1)
y = X.pop('TRUTH')

# X_train

In [None]:
X_train, X_valid, y_train, y_valid = group_split(X, y, question_no)

print("\n\tDATABASE")
print(X.shape)
print("TRUE :", lie_data[ lie_data['TRUTH'] == 1 ].shape[0])
print("FALSE:", lie_data[ lie_data['TRUTH'] == 0 ].shape[0])

print("\n\tTRAIN")
print(X_train.shape)
print("TRUE :", sum(y_train == 1 ) )
print("FALSE:", sum(y_train == 0 ) )

print("\n\tTEST")
print(X_valid.shape)
print("TRUE :", sum(y_valid == 1 ) )
print("FALSE:", sum(y_valid == 0 ) )
# X_train
# X_valid

In [None]:
input_shape = X.shape[1]
input_shape

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import layers, activations, losses, metrics

In [None]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(input_shape,)),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    
    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    
    layers.Dense(1, activation=None)
])

In [None]:
adam = tf.keras.optimizers.Adam(learning_rate=0.1)

model.compile(
    optimizer = adam,
    loss    = losses.BinaryCrossentropy(from_logits=True),
    metrics = [metrics.BinaryAccuracy()]
)

early_stopping = keras.callbacks.EarlyStopping(
    patience  = 40,
    min_delta = 0.001,
    restore_best_weights=True,
)

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size = 16,
    epochs = 128,
    # callbacks=[early_stopping],
)

history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy", ylim=[-0.1, 1.1])

In [None]:
model.summary()

In [None]:
import matplotlib.pyplot as plt

history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
plt.savefig("cross-entropy.png")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy", ylim=[-0.1, 1.1])
plt.savefig("accuracy.png")

# PAST