In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import linear_model

%matplotlib inline

In [2]:
cancer1 = load_breast_cancer()
cancer1.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [3]:
cancer1.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [4]:
df = pd.DataFrame(cancer1.data, columns = cancer1.feature_names)
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
tgt = pd.DataFrame(cancer1.target, columns = ["TGT"])
tgt.head()

Unnamed: 0,TGT
0,0
1,0
2,0
3,0
4,0


In [6]:
(X_train, X_test, Y_train, Y_test) = train_test_split(df, tgt, test_size = 0.3)

In [7]:
print("X_train shape", X_train.shape)
print("Y_train shape", Y_train.shape)
print("X_test shape", X_test.shape)
print("Y_test shape", Y_test.shape)

X_train shape (398, 30)
Y_train shape (398, 1)
X_test shape (171, 30)
Y_test shape (171, 1)


In [8]:
minmax = MinMaxScaler()
train_minmax = minmax.fit_transform(X_train)
test_minmax = minmax.fit_transform(X_test)

In [9]:
print("train_minmax shape", train_minmax.shape)
print("test_minmax shape", test_minmax.shape)

train_minmax shape (398, 30)
test_minmax shape (171, 30)


In [10]:
# print(train_minmax)
# print(test_minmax)
print()




In [11]:
model = LogisticRegression()

In [12]:
model.fit(train_minmax, Y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [13]:
pred = model.predict(test_minmax)

In [14]:
model.score(train_minmax, Y_train)

0.9673366834170855

In [15]:
model.score(test_minmax, Y_test)

0.9239766081871345

In [16]:
model.coef_

array([[-0.73965141, -0.81090666, -0.81068895, -1.01825827,  0.24596091,
        -0.51128919, -1.77318117, -2.33147101,  0.21585921,  1.16467524,
        -1.25217702,  0.36000552, -0.94256437, -0.85321961,  0.86819961,
         0.79160078,  0.43539633,  0.23315993,  0.39718584,  0.85891802,
        -1.59861357, -1.21726872, -1.49114273, -1.4256404 , -0.80926104,
        -0.68330474, -1.19326595, -2.26940335, -0.71751846, -0.24359455]])

In [17]:
model.intercept_

array([5.01127524])