In [46]:
import pandas as pd
df = pd.read_csv('train.csv')
df

Unnamed: 0,employee_name,user_id,O,C,E,A,N,insider
0,Calvin Edan Love,CEL0561,40,39,36,19,40,1
1,Christine Reagan Deleon,CRD0624,26,22,17,39,32,1
2,Jade Felicia Caldwell,JFC0557,22,16,23,40,33,1
3,Aquila Stewart Dejesus,ASD0577,40,48,36,14,37,1
4,Micah Abdul Rojas,MAR0955,36,44,23,44,25,0
...,...,...,...,...,...,...,...,...
265,Holly Xerxes Lara,HXL0968,45,21,35,40,34,0
266,Kirby Ruby Lyons,KRL0501,18,27,23,28,26,0
267,Kareem Porter Collins,KPC0073,44,30,11,16,23,0
268,Riley Ginger Guy,RGG0064,36,19,20,42,34,0


In [68]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X = df[['O', 'C', 'E', 'A', 'N']]
Y = df['insider']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0) # 80%のデータを学習データに、20%を検証データにする

# ロジスティック回帰で学習
lr = LogisticRegression()
lr.fit(X_train, Y_train)
print("coefficient = ", lr.coef_)
print("intercept = ", lr.intercept_)

Y_pred = lr.predict(X_test)
print(type(Y_pred))

coefficient =  [[-0.03670863 -0.01998146  0.01744572  0.01780376 -0.00202065]]
intercept =  [2.08857843]
<class 'numpy.ndarray'>


In [48]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# モデルの評価
print('confusion matrix = \n', confusion_matrix(y_true=Y_test, y_pred=Y_pred))
print('accuracy = ', accuracy_score(y_true=Y_test, y_pred=Y_pred))
print('precision = ', precision_score(y_true=Y_test, y_pred=Y_pred))
print('recall = ', recall_score(y_true=Y_test, y_pred=Y_pred))
print('f1 score = ', f1_score(y_true=Y_test, y_pred=Y_pred))

confusion matrix = 
 [[ 0 16]
 [ 0 38]]
accuracy =  0.7037037037037037
precision =  0.7037037037037037
recall =  1.0
f1 score =  0.8260869565217391


In [49]:
import pandas as pd
import  numpy as np
test_df = pd.read_csv('test.csv')
test_df
X_test = test_df[['O', 'C', 'E', 'A', 'N']]
Y_prob = lr.predict_proba(X_test)

Y_prob = np.delete(Y_prob, [1], 1)
test_df['insider_prob'] = np.ravel(Y_prob)
test_df

Unnamed: 0,employee_name,user_id,O,C,E,A,N,authority,insider_prob
0,Nishio-kun,MCN0973,44,30,11,16,23,Strong,0.424521
1,Yagawa-kun,NSC0622,38,35,25,33,35,Strong,0.279439
2,Kathleen Audrey Vargas,KAV0428,37,35,31,18,31,Strong,0.303718
3,Taiyaki-kun,AJM0772,44,30,36,20,25,Strong,0.308411
4,Nicole Maris Valentine,NMV0507,39,30,21,11,36,Strong,0.366571
5,Tsujinaga-kun,CMW0297,50,43,25,36,33,Strong,0.40027
6,Christian James Rutledge,CJR0414,44,17,26,48,24,Normal,0.198868
7,Keefe Darius Duran,KDD0511,43,26,24,40,26,Normal,0.255612
8,Shimizu-san,ABH0821,15,35,37,26,30,Normal,0.131659
9,Kashiwazaki-sensei,BAG0190,41,10,14,39,31,Normal,0.221033


In [50]:
test_df['c_a'] = test_df['authority'].apply(lambda x : 10.0 if x == "Strong" else (7.0 if x == "Normal" else 5.0))
test_df['v_r'] = test_df['authority'].apply(lambda x : 10.0 if x == "Strong" else (7.0 if x == "Normal" else 5.0))
test_df['v_w'] = test_df['v_r'].apply(lambda x : x-0.2*x)

In [70]:
params = {'c_r': 2.5, 'c_rw': 8.0, 'c_w': 1.5, 'c_ww': 7.0}

def calc_best_response(row, params):
    c_r = params["c_r"]
    c_rw = params["c_rw"]
    c_w = params["c_w"]
    c_ww = params["c_ww"]
    c_a = row["c_a"]
    v_r = row["v_r"]
    v_w = row["v_w"]
    p_i = row["insider_prob"]

    actions = []
    actions.append((p_i*(-c_r+v_r)+(1-p_i)*(-c_r-c_rw), "Revoke")) # Revoke
    actions.append((p_i*(-c_w+v_w)+(1-p_i)*(-c_w-c_ww), "Warn")) # Warn
    actions.append((p_i*(-c_a), "Keep")) # Keep
    return max(actions)[1]

test_df["best_response"] = test_df.apply(calc_best_response, params=params, axis=1)
test_df

Unnamed: 0,employee_name,user_id,O,C,E,A,N,authority,insider_prob,c_a,v_r,v_w,best_response
0,Nishio-kun,MCN0973,44,30,11,16,23,Strong,0.424521,10.0,10.0,8.0,Warn
1,Yagawa-kun,NSC0622,38,35,25,33,35,Strong,0.279439,10.0,10.0,8.0,Keep
2,Kathleen Audrey Vargas,KAV0428,37,35,31,18,31,Strong,0.303718,10.0,10.0,8.0,Keep
3,Taiyaki-kun,AJM0772,44,30,36,20,25,Strong,0.308411,10.0,10.0,8.0,Keep
4,Nicole Maris Valentine,NMV0507,39,30,21,11,36,Strong,0.366571,10.0,10.0,8.0,Warn
5,Tsujinaga-kun,CMW0297,50,43,25,36,33,Strong,0.40027,10.0,10.0,8.0,Warn
6,Christian James Rutledge,CJR0414,44,17,26,48,24,Normal,0.198868,7.0,7.0,5.6,Keep
7,Keefe Darius Duran,KDD0511,43,26,24,40,26,Normal,0.255612,7.0,7.0,5.6,Keep
8,Shimizu-san,ABH0821,15,35,37,26,30,Normal,0.131659,7.0,7.0,5.6,Keep
9,Kashiwazaki-sensei,BAG0190,41,10,14,39,31,Normal,0.221033,7.0,7.0,5.6,Keep
