### LOGISTIC REGRESSION

In [1]:
# Load libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")

In [2]:
# Unpickle data
data = pd.read_pickle('data')

In [3]:
# Separate target and features
target = 'diagnosis'
y = data[target]
X = data.drop(columns=[target])
features_DT_list = ['texture_mean', 'area_worst', 'smoothness_worst', 'area_mean', 'concavity_mean']
X = X[features_DT_list]

In [4]:
# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

lr_model = LogisticRegression(random_state=0, C=10, multi_class='ovr', solver='newton-cg').fit(X_train, y_train)

In [5]:
# Get prediction
y_pred = lr_model.predict(X_test)

In [6]:
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.9649122807017544
Classification Report:
              precision    recall  f1-score   support

           B       0.97      0.97      0.97       102
           M       0.96      0.96      0.96        69

    accuracy                           0.96       171
   macro avg       0.96      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



In [7]:
le = LabelEncoder()
y_test = le.fit_transform(y_test)
y_pred = le.fit_transform(y_pred)

print('ROC_AUC Score', roc_auc_score(y_test, y_pred))

ROC_AUC Score 0.9635549872122762


In [8]:
# Logistic Regression attributes

In [9]:
lr_model.classes_

array(['B', 'M'], dtype=object)

In [10]:
lr_model.coef_

array([[ 0.30351628,  0.0278593 ,  3.81442389, -0.02193761,  8.29138305]])

In [11]:
lr_model.intercept_

array([-16.22625165])

In [12]:
lr_model.n_iter_

array([67])

In [13]:
# Logostic Regression methods

In [14]:
lr_model.decision_function(X_test)

array([ 8.03477155, -0.90820965,  9.31492411, -8.23170108, -4.51521445,
       -6.23422346,  3.40070415, 16.94382884,  0.59826033, -3.07264851,
       -6.86671444, 23.09243458, 59.37066552, 20.64859843, -4.95729601,
        3.03848902,  4.48050063, -2.75455119, -5.714324  , -4.98294785,
       -0.11682399, -6.93273251,  0.35723586, 37.29315207,  6.17666583,
        6.90367391, -4.0031691 , -5.87687208, -3.94685489, -5.60534961,
       -3.08802997, -0.60845027, -5.53946625, 11.47209387, 12.6901279 ,
       -5.55474657, -4.17864169, -3.4719373 , -8.83263999, 26.57229466,
        6.46703978, -6.49302828, -6.56494348,  5.95600212, -2.79141501,
       12.13062113, -3.60054023, -4.09631786, -6.9463151 , -7.32521918,
        1.31819817, -8.89850712,  0.17854957,  4.79459819, -3.94778817,
       -0.29605636, -4.73694432,  0.10683138, -3.65659371, -6.00768315,
       15.68345841, 11.86887826, -4.29510354, -3.00148273, 19.66612917,
       -5.70624231,  6.77710018, 38.45167725, -5.19624207,  9.06

In [15]:
lr_model.get_params()

{'C': 10,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'ovr',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 0,
 'solver': 'newton-cg',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [16]:
lr_model.predict(X_test)

array(['M', 'B', 'M', 'B', 'B', 'B', 'M', 'M', 'M', 'B', 'B', 'M', 'M',
       'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'M', 'M',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B',
       'M', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B',
       'M', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'M',
       'B', 'M', 'M', 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'M',
       'M', 'B', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'M', 'B', 'B',
       'B', 'B', 'M', 'B', 'M', 'M', 'B', 'M', 'M', 'M', 'M', 'B', 'B',
       'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'M',
       'B', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'B',
       'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'M', 'B', 'M',
       'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B',
       'M', 'B'], dtype=object)

In [17]:
lr_model.predict_log_proba(X_test)

array([[-8.03509550e+00, -3.23946063e-04],
       [-3.38787790e-01, -1.24699744e+00],
       [-9.31501418e+00, -9.00658811e-05],
       [-2.66047922e-04, -8.23196713e+00],
       [-1.08818359e-02, -4.52609629e+00],
       [-1.95923099e-03, -6.23618269e+00],
       [-3.43350988e+00, -3.28057374e-02],
       [-1.69438289e+01, -4.37913791e-08],
       [-1.03636507e+00, -4.38104738e-01],
       [-4.52585740e-02, -3.11790709e+00],
       [-1.04135226e-03, -6.86775579e+00],
       [-2.30924343e+01, -9.35584943e-11],
       [           -inf,  0.00000000e+00],
       [-2.06485985e+01, -1.07752562e-09],
       [-7.00730777e-03, -4.96430332e+00],
       [-3.08528407e+00, -4.67950532e-02],
       [-4.49176469e+00, -1.12640624e-02],
       [-6.16947072e-02, -2.81624590e+00],
       [-3.29295177e-03, -5.71761695e+00],
       [-6.83044797e-03, -4.98977830e+00],
       [-6.36440198e-01, -7.53264185e-01],
       [-9.74856793e-04, -6.93370737e+00],
       [-8.87633185e-01, -5.30397323e-01],
       [   

In [18]:
lr_model.predict_proba(X_test)

array([[3.23893598e-04, 9.99676106e-01],
       [7.12633661e-01, 2.87366339e-01],
       [9.00618253e-05, 9.99909938e-01],
       [9.99733987e-01, 2.66012535e-04],
       [9.89177157e-01, 1.08228429e-02],
       [9.98042687e-01, 1.95731295e-03],
       [3.22734656e-02, 9.67726534e-01],
       [4.37913782e-08, 9.99999956e-01],
       [3.54741804e-01, 6.45258196e-01],
       [9.55750318e-01, 4.42496823e-02],
       [9.98959190e-01, 1.04081024e-03],
       [9.35584943e-11, 1.00000000e+00],
       [0.00000000e+00, 1.00000000e+00],
       [1.07752562e-09, 9.99999999e-01],
       [9.93017186e-01, 6.98281383e-03],
       [4.57170452e-02, 9.54282955e-01],
       [1.12008603e-02, 9.88799140e-01],
       [9.40169870e-01, 5.98301300e-02],
       [9.96712464e-01, 3.28753595e-03],
       [9.93192827e-01, 6.80717348e-03],
       [5.29172825e-01, 4.70827175e-01],
       [9.99025618e-01, 9.74381775e-04],
       [4.11628850e-01, 5.88371150e-01],
       [0.00000000e+00, 1.00000000e+00],
       [2.073036