In [1]:
def get_clf_eval(y_test, pred):
    from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
    confusion = confusion_matrix(y_test,pred)
    accuracy = accuracy_score(y_test,pred)
    precision = precision_score(y_test,pred)
    recall = recall_score(y_test,pred)
    print('오차행렬')
    print(confusion)
    print(f'정확도:{accuracy:.4f}, 정밀도:{precision:4f}, 재현율{recall:4f}')

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
def fillna(df):
    df['Age'].fillna(df['Age'].mean(),inplace=True)
    df['Cabin'].fillna('N',inplace=True)
    df['Embarked'].fillna('N',inplace=True)
    df['Fare'].fillna('0',inplace=True)
    return df

def drop_features(df):
    df.drop(columns=['PassengerId','Name','Ticket'],inplace=True)
    return df


def format_features(df):
    from sklearn.preprocessing import LabelEncoder
    df['Cabin'] = df['Cabin'].str[:1]
    features=['Sex', 'Cabin', 'Embarked']
    for feature in features:
        le = LabelEncoder()
        df[feature] = le.fit_transform(df[feature])
        print(le.classes_)
    return df

def transform_features(df):
    df = fillna(df)
    df = drop_features(df)
    df = format_features(df)
    return df

In [4]:
df = pd.read_csv('titanic.csv')
y = df['Survived']
X = df.drop(columns=['Survived'])
X = transform_features(X)

['female' 'male']
['A' 'B' 'C' 'D' 'E' 'F' 'G' 'N' 'T']
['C' 'N' 'Q' 'S']


In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=11)

In [6]:
lr_clf = LogisticRegression(solver='liblinear')
lr_clf.fit(X_train,y_train)
pred = lr_clf.predict(X_test)
get_clf_eval(y_test,pred)

오차행렬
[[108  10]
 [ 14  47]]
정확도:0.8659, 정밀도:0.824561, 재현율0.770492


In [7]:
pred

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1], dtype=int64)

In [8]:
lr_clf.predict_proba(X_test)

array([[0.44935225, 0.55064775],
       [0.86335511, 0.13664489],
       [0.86429643, 0.13570357],
       [0.84968519, 0.15031481],
       [0.82343409, 0.17656591],
       [0.84231224, 0.15768776],
       [0.87095489, 0.12904511],
       [0.27228603, 0.72771397],
       [0.78185128, 0.21814872],
       [0.33185998, 0.66814002],
       [0.86178763, 0.13821237],
       [0.87058097, 0.12941903],
       [0.8642595 , 0.1357405 ],
       [0.87065944, 0.12934056],
       [0.56033544, 0.43966456],
       [0.85003022, 0.14996978],
       [0.88954172, 0.11045828],
       [0.74250732, 0.25749268],
       [0.71120224, 0.28879776],
       [0.23776278, 0.76223722],
       [0.75684107, 0.24315893],
       [0.62428169, 0.37571831],
       [0.84655246, 0.15344754],
       [0.82711256, 0.17288744],
       [0.86825628, 0.13174372],
       [0.77003828, 0.22996172],
       [0.82946349, 0.17053651],
       [0.90336131, 0.09663869],
       [0.73372049, 0.26627951],
       [0.68847387, 0.31152613],
       [0.

In [9]:
pred_proba = lr_clf.predict_proba(X_test)

In [10]:
np.concatenate([pred_proba, pred.reshape(-1,1)],axis=1)

array([[0.44935225, 0.55064775, 1.        ],
       [0.86335511, 0.13664489, 0.        ],
       [0.86429643, 0.13570357, 0.        ],
       [0.84968519, 0.15031481, 0.        ],
       [0.82343409, 0.17656591, 0.        ],
       [0.84231224, 0.15768776, 0.        ],
       [0.87095489, 0.12904511, 0.        ],
       [0.27228603, 0.72771397, 1.        ],
       [0.78185128, 0.21814872, 0.        ],
       [0.33185998, 0.66814002, 1.        ],
       [0.86178763, 0.13821237, 0.        ],
       [0.87058097, 0.12941903, 0.        ],
       [0.8642595 , 0.1357405 , 0.        ],
       [0.87065944, 0.12934056, 0.        ],
       [0.56033544, 0.43966456, 0.        ],
       [0.85003022, 0.14996978, 0.        ],
       [0.88954172, 0.11045828, 0.        ],
       [0.74250732, 0.25749268, 0.        ],
       [0.71120224, 0.28879776, 0.        ],
       [0.23776278, 0.76223722, 1.        ],
       [0.75684107, 0.24315893, 0.        ],
       [0.62428169, 0.37571831, 0.        ],
       [0.

In [11]:
from sklearn.preprocessing import Binarizer

In [12]:
X=[[1,-1,2],
   [2,0,0],
   [0,1.1,1.2]]
X

[[1, -1, 2], [2, 0, 0], [0, 1.1, 1.2]]

In [13]:
Binarizer = Binarizer(threshold=1.1)

In [14]:
Binarizer.fit_transform(X)

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.]])

In [15]:
custom_threshold = 0.5

In [16]:
pred_proba[:,-1].reshape(-1,1)

array([[0.55064775],
       [0.13664489],
       [0.13570357],
       [0.15031481],
       [0.17656591],
       [0.15768776],
       [0.12904511],
       [0.72771397],
       [0.21814872],
       [0.66814002],
       [0.13821237],
       [0.12941903],
       [0.1357405 ],
       [0.12934056],
       [0.43966456],
       [0.14996978],
       [0.11045828],
       [0.25749268],
       [0.28879776],
       [0.76223722],
       [0.24315893],
       [0.37571831],
       [0.15344754],
       [0.17288744],
       [0.13174372],
       [0.22996172],
       [0.17053651],
       [0.09663869],
       [0.26627951],
       [0.31152613],
       [0.92353131],
       [0.7746788 ],
       [0.12838061],
       [0.75924582],
       [0.37288269],
       [0.22996172],
       [0.09445724],
       [0.59397426],
       [0.06956416],
       [0.1234948 ],
       [0.30202578],
       [0.10335405],
       [0.78006621],
       [0.68434287],
       [0.62057772],
       [0.62067109],
       [0.92838719],
       [0.442

In [23]:
pred_proba_1 = pred_proba[:,-1].reshape(-1,1)

In [24]:
custom_predict = Binarizer(threshold=custom_threshold).fit_transform(pred_proba_1)

TypeError: 'Binarizer' object is not callable

In [19]:
# 오차행렬
# [[108  10]
#  [ 14  47]]
# 정확도:0.8659, 정밀도:0.824561, 재현율0.770492
get_clf_eval(y_test,custom_predict)

NameError: name 'custom_predict' is not defined

In [None]:
custom_threshold = 0.4
custom_predict = Binarizer(threshold=custom_threshold).fit_transform(pred_proba_1)
get_clf_eval(y_test,custom_predict)

In [None]:
custom_threshold = 0.6
custom_predict = Binarizer(threshold=custom_threshold).fit_transform(pred_proba_1)
get_clf_eval(y_test,custom_predict)