In [133]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
plt.rc("font", size=14)

In [134]:
data = pd.read_csv("gestures.csv")

data.head()

Unnamed: 0,id,OtherGestures,Smile,Laugh,Scowl,otherEyebrowMovement,Frown,Raise,OtherEyeMovements,Close-R,...,forwardHead,downRHead,singleHand,bothHands,otherHandM,complexHandM,sidewaysHand,downHands,upHands,class
0,trial_lie_001.mp4,1,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,deceptive
1,trial_lie_002.mp4,1,0,0,0,0,1,0,1,0,...,0,0,0,1,0,1,0,0,0,deceptive
2,trial_lie_003.mp4,1,0,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,deceptive
3,trial_lie_004.mp4,1,0,0,0,1,0,0,1,0,...,0,1,0,0,1,0,0,0,0,deceptive
4,trial_lie_005.mp4,1,0,0,0,0,1,0,1,0,...,0,0,1,0,0,0,0,0,0,deceptive


In [135]:
print(data.info())
print(data.shape)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 41 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   id                    121 non-null    object
 1   OtherGestures         121 non-null    int64 
 2   Smile                 121 non-null    int64 
 3   Laugh                 121 non-null    int64 
 4   Scowl                 121 non-null    int64 
 5   otherEyebrowMovement  121 non-null    int64 
 6   Frown                 121 non-null    int64 
 7   Raise                 121 non-null    int64 
 8   OtherEyeMovements     121 non-null    int64 
 9   Close-R               121 non-null    int64 
 10  X-Open                121 non-null    int64 
 11  Close-BE              121 non-null    int64 
 12  gazeInterlocutor      121 non-null    int64 
 13  gazeDown              121 non-null    int64 
 14  gazeUp                121 non-null    int64 
 15  otherGaze             121 non-null    in

In [136]:
data['class'].value_counts()

deceptive    61
truthful     60
Name: class, dtype: int64

In [137]:
data.groupby('class').mean()

Unnamed: 0_level_0,OtherGestures,Smile,Laugh,Scowl,otherEyebrowMovement,Frown,Raise,OtherEyeMovements,Close-R,X-Open,...,waggle,forwardHead,downRHead,singleHand,bothHands,otherHandM,complexHandM,sidewaysHand,downHands,upHands
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
deceptive,0.688525,0.098361,0.016393,0.196721,0.311475,0.295082,0.393443,0.622951,0.213115,0.098361,...,0.016393,0.098361,0.42623,0.180328,0.442623,0.377049,0.47541,0.04918,0.032787,0.04918
truthful,0.616667,0.15,0.0,0.233333,0.266667,0.116667,0.616667,0.283333,0.583333,0.116667,...,0.0,0.083333,0.316667,0.25,0.25,0.5,0.166667,0.033333,0.033333,0.266667


In [138]:
X = data.drop(["id", "class"], 1)
y = (data["class"] == "truthful").astype(np.int)

X_train1 = X.iloc[0:50]
X_train2 = X.iloc[-50:]
X_train = pd.concat([X_train1, X_train2])

y_train1 = y.iloc[0:50]
y_train2 = y.iloc[-50:]
y_train = pd.concat([y_train1, y_train2])

X_test = X.iloc[50: 70]
y_test = y.iloc[50: 70]

In [139]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)

In [140]:
# Find accuracy
count = 0
for i in range(len(y_test)):
    if y_test.iloc[i] == y_pred[i]:
        count += 1

print("Logistic Regression precision:", count/len(y_test))

Logistic Regression precision: 0.65


In [141]:
# Filter bad predictors

means = data.drop("id", 1).groupby('class').mean()
deceptive_means = means.iloc[0]
truthful_means = means.iloc[1]
col_is_interesting = (deceptive_means - truthful_means).abs() > 0.2

# Select interesting columns
interesting_cols = []
for col in col_is_interesting.keys():
    if(col_is_interesting[col]):
        interesting_cols.append(col)
        
X_train_filtered = X_train.filter(items = interesting_cols)
X_test_filtered = X_test.filter(items = interesting_cols)

print(X_train_filtered.shape, X_train_filtered.shape)

(100, 7) (100, 7)


In [142]:
log_reg_filtered = LogisticRegression()
log_reg_filtered.fit(X_train_filtered, y_train)
y_pred_filtered = log_reg_filtered.predict(X_test_filtered)

In [143]:
# Find accuracy for filtered model
count = 0
for i in range(len(y_test)):
    if y_test.iloc[i] == y_pred_filtered[i]:
        count += 1

print("Logistic Regression precision:", count/len(y_test))

Logistic Regression precision: 0.7


In [144]:
# Cross Validations Scores
from sklearn.model_selection import cross_val_score

scores = cross_val_score(log_reg, X, y, cv=6, scoring='accuracy')
print("Unfiltered Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

X_filtered = X.filter(items = interesting_cols)

scores_filtered = cross_val_score(log_reg_filtered, X_filtered, y, cv=6, scoring='accuracy')
print("Filtered Accuracy: %0.2f (+/- %0.2f)" % (scores_filtered.mean(), scores_filtered.std() * 2))

Unfiltered Accuracy: 0.74 (+/- 0.23)
Filtered Accuracy: 0.78 (+/- 0.21)
