# ロジスティック回帰

In [1]:
import pandas as pd
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [2]:
# データ取得
df = sns.load_dataset('titanic')
df.dropna(inplace=True)
df.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,182.0,182.0,182.0,182.0,182.0,182.0
mean,0.675824,1.192308,35.623187,0.467033,0.478022,78.919735
std,0.469357,0.516411,15.671615,0.645007,0.755869,76.490774
min,0.0,1.0,0.92,0.0,0.0,0.0
25%,0.0,1.0,24.0,0.0,0.0,29.7
50%,1.0,1.0,36.0,0.0,0.0,57.0
75%,1.0,1.0,47.75,1.0,1.0,90.0
max,1.0,3.0,80.0,3.0,4.0,512.3292


In [3]:
# 説明変数、目的変数を取得
X = df.loc[:, (df.columns!='survived') & (df.columns!='alive')]
X = pd.get_dummies(X, drop_first=True)
y = df['survived'] #y.unique()で1と0しかないことを確認すれば安心

In [4]:
# hold-out
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [5]:
# 学習：データが少なくパラメータが収束していないため警告表示される
model = LogisticRegression()
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [6]:
# 予測
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

In [7]:
# 評価
log_loss(y_test, y_pred_proba)

0.4111542544960858

In [8]:
# 係数
model.coef_

array([[-0.29604056, -0.0212999 ,  0.62278654, -0.37418118,  0.00478741,
        -0.88711361,  0.17505452,  0.38816208, -0.40447653, -0.02287815,
        -0.45303506, -0.40467948, -0.88711361,  1.21192042, -0.11122431,
        -1.20741802, -0.1415293 ,  0.6111008 , -0.13069551, -0.55151836,
        -0.40447653, -0.02287815]])

In [9]:
# 切片
model.intercept_

array([2.14281591])

In [10]:
# 分類名
model.classes_

array([0, 1])