# Using SHAP to explain the Personality Prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120

# Show all columns
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [2]:
df = pd.read_csv('data_treated/ml_pers_traits.csv', index_col=None)

df.drop(columns=['Unnamed: 0'], inplace=True)

print(df.shape)
df.head(2)

(137, 63)


Unnamed: 0,Gender,Age,Education_lev,fluidIQ,freq_calls,freq_Tools,freq_Finance,freq_Games,freq_Entertainment,freq_Productivity,freq_Personalization,freq_News...Magazines,freq_Unknown,freq_Photography,freq_Shopping,freq_Communication,freq_Books...Reference,freq_Travel...Local,freq_Music...Audio,freq_Medical,freq_Education,freq_Business,freq_Lifestyle,freq_Transportation,freq_Weather,freq_Sports,freq_Browser,freq_Health...Fitness,freq_Media...Video,freq_Social,freq_Comics,dur_calls,dur_Tools,dur_Finance,dur_Games,dur_Entertainment,dur_Productivity,dur_Personalization,dur_News...Magazines,dur_Unknown,dur_Photography,dur_Shopping,dur_Communication,dur_Books...Reference,dur_Travel...Local,dur_Music...Audio,dur_Medical,dur_Education,dur_Business,dur_Lifestyle,dur_Transportation,dur_Weather,dur_Sports,dur_Browser,dur_Health...Fitness,dur_Media...Video,dur_Social,dur_Comics,Emotional Stability,Extraversion,Openness,Conscientiousness,Agreeableness
0,1,23,4,0.7251,120,414,0,40,51,553,0,8,31,260,52,1930,3,128,113,0,11,1,7,153,0,184,231,176,1,8,0,80.508333,19.989234,0.0,474.149741,66.142812,27.299807,0.0,63.482948,5.594866,22.306456,62.541808,60.222363,102.0,66.64078,19.273367,0.0,35.316076,4.0,60.666667,55.469027,0.0,124.447371,72.502856,125.382997,140.0,0.0,0.0,0,0,0,0,0
1,1,21,4,0.9921,98,239,3,0,21,88,0,4,42,113,23,3430,5,19,4,0,0,7,0,34,0,0,187,0,1,211,0,176.214286,21.201236,77.0,0.0,292.695366,29.823646,0.0,0.0,2.149066,16.753147,90.80438,42.197411,52.0,53.702947,0.0,0.0,0.0,24.285714,0.0,74.587645,0.0,0.0,95.82911,0.0,0.0,82.581954,0.0,1,0,1,1,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137 entries, 0 to 136
Data columns (total 63 columns):
Gender                    137 non-null int64
Age                       137 non-null int64
Education_lev             137 non-null int64
fluidIQ                   137 non-null float64
freq_calls                137 non-null int64
freq_Tools                137 non-null int64
freq_Finance              137 non-null int64
freq_Games                137 non-null int64
freq_Entertainment        137 non-null int64
freq_Productivity         137 non-null int64
freq_Personalization      137 non-null int64
freq_News...Magazines     137 non-null int64
freq_Unknown              137 non-null int64
freq_Photography          137 non-null int64
freq_Shopping             137 non-null int64
freq_Communication        137 non-null int64
freq_Books...Reference    137 non-null int64
freq_Travel...Local       137 non-null int64
freq_Music...Audio        137 non-null int64
freq_Medical              137 non-null

## Machine Learning

In [5]:
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

#### Split data from training and test

In [6]:
np.random.seed(1)
X = df.iloc[:, :-5]
Y_all_traits = df[['Emotional Stability','Extraversion','Openness','Conscientiousness','Agreeableness']]

X_train, X_test, y_train_all_traits, y_test_all_traits = train_test_split(X, Y_all_traits, test_size=0.25, random_state = 74)

In [7]:
X_train.to_csv('data_splitted/ml_X_train.csv')
X_test.to_csv('data_splitted/ml_X_test.csv')

y_train_all_traits.to_csv('data_splitted/ml_y_all_traits_train.csv')
y_test_all_traits.to_csv('data_splitted/ml_y_all_traits_test.csv')

print(X_train.shape)
print(X_test.shape)
# All five traits targets
print(y_train_all_traits.shape)
print(y_test_all_traits.shape)

(102, 58)
(35, 58)
(102, 5)
(35, 5)


In [9]:
X_test.head(5)

Unnamed: 0,Gender,Age,Education_lev,fluidIQ,freq_calls,freq_Tools,freq_Finance,freq_Games,freq_Entertainment,freq_Productivity,freq_Personalization,freq_News...Magazines,freq_Unknown,freq_Photography,freq_Shopping,freq_Communication,freq_Books...Reference,freq_Travel...Local,freq_Music...Audio,freq_Medical,freq_Education,freq_Business,freq_Lifestyle,freq_Transportation,freq_Weather,freq_Sports,freq_Browser,freq_Health...Fitness,freq_Media...Video,freq_Social,freq_Comics,dur_calls,dur_Tools,dur_Finance,dur_Games,dur_Entertainment,dur_Productivity,dur_Personalization,dur_News...Magazines,dur_Unknown,dur_Photography,dur_Shopping,dur_Communication,dur_Books...Reference,dur_Travel...Local,dur_Music...Audio,dur_Medical,dur_Education,dur_Business,dur_Lifestyle,dur_Transportation,dur_Weather,dur_Sports,dur_Browser,dur_Health...Fitness,dur_Media...Video,dur_Social,dur_Comics
82,1,21,4,0.7566,28,293,6,199,17,281,0,0,19,385,80,4621,143,66,39,0,33,22,0,105,13,0,727,0,9,1659,0,12.071429,13.04421,37.0,188.938817,276.625,19.024673,0.0,0.0,3.147828,24.344433,50.906274,32.099305,193.623821,51.18379,15.122307,0.0,11.257627,97.419155,0.0,36.78402,6.537799,0.0,97.616783,0.0,1352.464837,134.498118,0.0
120,2,19,4,0.9547,627,1137,359,519,98,256,0,21,763,507,111,6092,43,47,87,35,4,112,10,210,0,0,659,9,17,2080,0,113.188198,3.25892,22.178779,118.901203,89.539916,12.346919,0.0,2.0,2.118835,6.469082,45.3206,19.674302,38.923613,53.46144,10.285218,3.660747,5.970033,24.28884,70.8,37.667244,0.0,0.0,59.398911,12.616721,39.4,101.118181,0.0
52,1,18,4,1.4896,76,1725,8,635,95,1178,26,30,1292,51,39,862,153,162,223,0,356,39,0,27,0,0,755,0,123,334,1,117.947368,2.066492,37.333345,8.428931,9.538364,13.421554,2.872503,51.720024,3.534347,19.108916,54.637637,8.591626,31.962175,29.082392,21.493582,0.0,109.773121,22.023497,0.0,33.150525,0.0,0.0,32.347281,0.0,4.169088,37.442936,397.0
16,2,20,4,0.9763,134,111,7,0,19,374,0,11,25,93,16,1326,69,73,37,0,0,10,4,56,0,0,311,52,4,736,0,35.865672,26.819805,49.74674,0.0,322.704987,22.136856,0.0,18.235017,3.56835,10.120037,72.088419,28.587304,34.047771,52.895711,22.324325,0.0,0.0,16.6,0.0,60.044083,0.0,0.0,72.82007,58.990064,46.206808,67.879375,0.0
72,2,28,5,0.656,32,122,0,5,0,140,0,144,15,80,0,963,2,24,67,0,60,14,0,38,0,0,117,0,1,0,0,258.0,32.608597,0.0,234.75,0.0,20.695836,0.0,15.636136,5.497845,40.722961,0.0,33.757956,0.0,89.256142,33.239869,0.0,244.86227,298.910349,0.0,55.089653,0.0,0.0,171.928596,0.0,12.0,0.0,0.0
