In [186]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
bc = load_breast_cancer(as_frame=True)
bc_df: pd.DataFrame = bc.frame

In [187]:
bc_df.shape

(569, 31)

In [188]:
bc_df.describe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,0.062798,...,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946,0.627417
std,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,0.00706,...,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061,0.483918
min,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,0.04996,...,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504,0.0
25%,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,0.0577,...,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146,0.0
50%,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,0.06154,...,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004,1.0
75%,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,0.06612,...,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208,1.0
max,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,0.09744,...,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075,1.0


In [189]:
print(bc.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [190]:
y_bc: pd.DataFrame = bc.target

In [191]:
x_bc: pd.DataFrame = bc.data

In [192]:
x_bc10= x_bc.iloc[:, 0:10]
x_bc10

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883
...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016


In [193]:
from sklearn.model_selection import train_test_split
x_bc_train, x_bc_test, y_bc_train, y_bc_test = train_test_split(
    x_bc10, y_bc, test_size=0.3, random_state=0)

In [194]:
from sklearn.linear_model import LogisticRegression
model_bc = LogisticRegression()

In [195]:
model_bc.fit(x_bc_train, y_bc_train)

In [196]:
print(model_bc.coef_)
print(model_bc.intercept_)

[[ 4.43803517 -0.15747975 -0.49120368 -0.02299483 -0.1977349  -0.88107831
  -1.17436636 -0.51610975 -0.32416124 -0.06441917]]
[0.71206521]


In [197]:
print(model_bc.predict(x_bc_test))
print(np.array(y_bc_test))

[1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 0 1 0 0 0 1 0 1 1 0 1 1 0 1 0 1 0 1 0 1 1 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 1
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 0 0 1 0 1 1 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0
 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 0 1 0 1 0 1 1 1]
[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 1 0 1 0 1 0 1 0 1
 0 1 0 0 1 0 1 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 1
 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0
 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 0 0 0 1 1 1]


In [198]:
print(model_bc.predict_proba(x_bc_test))

[[4.57042606e-01 5.42957394e-01]
 [1.79395138e-01 8.20604862e-01]
 [8.66171843e-02 9.13382816e-01]
 [1.65251193e-01 8.34748807e-01]
 [5.37373549e-02 9.46262645e-01]
 [4.12045708e-02 9.58795429e-01]
 [2.02153766e-01 7.97846234e-01]
 [3.44362161e-02 9.65563784e-01]
 [3.93259776e-02 9.60674022e-01]
 [2.17382152e-02 9.78261785e-01]
 [5.21246619e-01 4.78753381e-01]
 [5.20485625e-01 4.79514375e-01]
 [3.52139768e-02 9.64786023e-01]
 [7.88984581e-01 2.11015419e-01]
 [2.92045774e-01 7.07954226e-01]
 [5.90735218e-01 4.09264782e-01]
 [9.94202404e-02 9.00579760e-01]
 [9.99915622e-01 8.43783506e-05]
 [9.99057991e-01 9.42008764e-04]
 [9.99965744e-01 3.42556665e-05]
 [1.82782202e-01 8.17217798e-01]
 [9.33806436e-01 6.61935642e-02]
 [7.28143828e-02 9.27185617e-01]
 [4.02062913e-02 9.59793709e-01]
 [9.99818823e-01 1.81177362e-04]
 [1.47654093e-02 9.85234591e-01]
 [8.44804001e-03 9.91551960e-01]
 [8.55281169e-01 1.44718831e-01]
 [6.50238395e-02 9.34976161e-01]
 [9.94539733e-01 5.46026657e-03]
 [2.281561

In [199]:
print(model_bc.score(x_bc_test, y_bc_test))
print(model_bc.score(x_bc_train, y_bc_train))

0.9181286549707602
0.907035175879397
