In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
#from sklearn import svm

In [None]:
breast_cancer = pd.read_csv("breast_cancer.csv")

In [None]:
breast_cancer.head()

In [None]:
breast_cancer.shape

In [None]:
breast_cancer.info()

In [None]:
breast_cancer.isnull().sum()

In [None]:
breast_cancer.drop(columns = "Unnamed: 32", axis = 1, inplace = True)

In [None]:
breast_cancer.head()

In [None]:
breast_cancer.describe()

In [None]:
breast_cancer["diagnosis"].value_counts()

In [None]:
#encoding the target column

label_encode = LabelEncoder()

labels = label_encode.fit_transform(breast_cancer["diagnosis"])

breast_cancer["target"] = labels

breast_cancer.drop(columns = "diagnosis", axis = 1, inplace = True)

In [None]:
breast_cancer.head()

In [None]:
breast_cancer["target"].value_counts()

0 ---> Bening

1 ---> Malignant

In [None]:
breast_cancer.groupby("target").mean()

In [None]:
sns.set()
sns.countplot(x = "target", data = breast_cancer)

In [None]:
for column in breast_cancer:
    print(column)

In [None]:
for column in breast_cancer:
    sns.displot(data = breast_cancer, x = column)
    

In [None]:
sns.distplot(x=breast_cancer.radius_mean)

In [None]:
first_column = breast_cancer.iloc[:, 0]

second_column = breast_cancer.iloc[:, 1]

In [None]:
print(first_column)

In [None]:
print(second_column)

In [None]:
plt.scatter(x = first_column, y = second_column)

In [None]:
for column in breast_cancer:
    plt.figure()
    breast_cancer.boxplot([column])

In [None]:
correlation_matrix = breast_cancer.corr()

In [None]:
plt.figure(figsize=(25,25))
sns.heatmap(correlation_matrix, cbar=True, fmt='.2f', annot=True, cmap='Blues')
#plt.savefig('Correlation Heat map')

In [None]:
x = breast_cancer.drop(columns = "target", axis = 1)
y = breast_cancer["target"]

In [None]:
print(x)

In [None]:
print(y)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 2)

In [None]:
print(x.shape, x_train.shape, x_test.shape)

In [None]:
logistic_model = LogisticRegression()

In [None]:
logistic_model.fit(x_train, y_train)

In [None]:
x_train_prediction = logistic_model.predict(x_train)
training_data_accuracy = accuracy_score(y_train, x_train_prediction)

In [None]:
print('Accuracy on training data = ', training_data_accuracy)

In [None]:
x_test_prediction = logistic_model.predict(x_test)
test_data_accuracy = accuracy_score(y_test, x_test_prediction)

In [None]:
print('Accuracy on test data = ', test_data_accuracy)

In [None]:
input_data = (842302,17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = logistic_model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print("The Breast cancer is Benign.")

else:
  print("The Breast Cancer is Malignant.")

In [None]:
import pickle

In [None]:
filename = "breast_cancer_model.sav"
pickle.dump(logistic_model, open(filename, "wb"))

In [None]:
#loading the saved model
loaded_model = pickle.load(open("breast_cancer_model.sav", "rb"))

In [None]:
input_data = (842302,17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = logistic_model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print("The Breast cancer is Benign.")

else:
  print("The Breast Cancer is Malignant.")