In [1]:
# Import dependencies

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import os
import joblib

In [2]:
# Reading data

final_data = pd.read_csv('final_data.csv')
final_data.head()

Unnamed: 0,diagnosis,texture_mean,area_mean,smoothness_mean,compactness_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,texture_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,smoothness_worst,symmetry_worst,fractal_dimension_worst
0,M,10.38,1001.0,0.1184,0.2776,0.1471,0.2419,0.07871,0.9053,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,0.1622,0.4601,0.1189
1,M,17.77,1326.0,0.08474,0.07864,0.07017,0.1812,0.05667,0.7339,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,0.1238,0.275,0.08902
2,M,21.25,1203.0,0.1096,0.1599,0.1279,0.2069,0.05999,0.7869,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,0.1444,0.3613,0.08758
3,M,20.38,386.1,0.1425,0.2839,0.1052,0.2597,0.09744,1.156,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,0.2098,0.6638,0.173
4,M,14.34,1297.0,0.1003,0.1328,0.1043,0.1809,0.05883,0.7813,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,0.1374,0.2364,0.07678


In [3]:
# Assign X (data) and y (target)

X = final_data[["concave points_mean", "area_mean", "area_se"]]
y = final_data["diagnosis"]
print(X.shape, y.shape)

(569, 3) (569,)


In [4]:
# Split test and train datasets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y, test_size=0.40)

In [5]:
# Develop logistic regression

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [6]:
classifier.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [7]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.9032258064516129
Testing Data Score: 0.8903508771929824


In [8]:
classifier.coef_

array([[0.2608048 , 0.01015042, 0.06990316]])

In [9]:
predictions = classifier.predict(X_test)
print(f"First 20 Predictions:   {predictions[:15]}")
print(f"First 20 Actual labels: {y_test[:15].tolist()}")

First 20 Predictions:   ['B' 'B' 'M' 'M' 'M' 'M' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B' 'B']
First 20 Actual labels: ['B', 'B', 'M', 'M', 'M', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B']


In [10]:
# Confusion matrix

from sklearn.metrics import confusion_matrix, classification_report, precision_score

print(classification_report(y_test, predictions, digits=3))

cfm = confusion_matrix(y_test, predictions)

true_negative = cfm[0][0]
false_positive = cfm[0][1]
false_negative = cfm[1][0]
true_positive = cfm[1][1]

print('Confusion Matrix: \n', cfm, '\n')

print('True Negative:', true_negative)
print('False Positive:', false_positive)
print('False Negative:', false_negative)
print('True Positive:', true_positive)
print('Correct Predictions', 
      round((true_negative + true_positive) / len(predictions) * 100, 1), '%')

              precision    recall  f1-score   support

           B      0.883     0.951     0.916       143
           M      0.905     0.788     0.843        85

    accuracy                          0.890       228
   macro avg      0.894     0.870     0.879       228
weighted avg      0.891     0.890     0.889       228

Confusion Matrix: 
 [[136   7]
 [ 18  67]] 

True Negative: 136
False Positive: 7
False Negative: 18
True Positive: 67
Correct Predictions 89.0 %


In [11]:
# Save the model to disk

filename = 'finalized_model.sav'
joblib.dump(classifier, filename)

['finalized_model.sav']

In [12]:
 # load the model from disk
    
loaded_model = joblib.load(filename)
result = classifier.score(X_test, y_test)
print(result)

0.8903508771929824


In [13]:
new_concave_point_value = 0.017
new_area_value  = 570
new_area_sevalue = 14

In [14]:
new_breastcancer_data = [[new_concave_point_value, new_area_value, new_area_sevalue]]
new_breastcancer_data

[[0.017, 570, 14]]

In [15]:
predicted_class = classifier.predict(new_breastcancer_data)
print(predicted_class)

['B']


In [16]:
new_concave_point_value = 0.05
new_area_value  = 648
new_area_sevalue = 38.7

In [17]:
new_breastcancer_data1 = [[new_concave_point_value, new_area_value, new_area_sevalue]]
new_breastcancer_data1

[[0.05, 648, 38.7]]

In [18]:
predicted_class = classifier.predict(new_breastcancer_data1)
print(predicted_class)

['M']
