In [None]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
#loading the data from sklearn
breast_cancer_dataset=sklearn.datasets.load_breast_cancer()

In [None]:
print(breast_cancer_dataset)

In [None]:
#loading the data to a data frame
data_frame=pd.DataFrame(breast_cancer_dataset.data, columns= breast_cancer_dataset.feature_names)

In [None]:
#print first 5 rows of dataset
data_frame.head()

In [None]:
#Adding hte target column to the dataframe
data_frame['label']=breast_cancer_dataset.target

In [None]:
#print last 5 rows of dataframe
data_frame.tail()


In [None]:
#shows rows and columns of dataset
data_frame.shape

In [None]:
#getting some information about dataframe
data_frame.info()


In [None]:
#checking for missing/null values
data_frame.isnull().sum()

In [None]:
#statistical measures of the data
data_frame.describe()

In [None]:
#checking the distribution of target variables
data_frame['label'].value_counts()

In [None]:
data_frame.groupby('label').mean()

In [None]:
# Separating the features and target
X=data_frame.drop(columns='label',axis=1)
Y=data_frame['label']

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
# Splitting the data into training data and testing data
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=2)

In [None]:
print(X.shape,X_train.shape,X_test.shape)

In [None]:
# Model Training---> Logistic Regression
model=LogisticRegression()

In [None]:
# Training the logistic regression model using training data
model.fit(X_train,Y_train)

In [None]:
# Model Evaluation----> Accuracy score
X_train_prediction=model.predict(X_train)
training_data_accuracy= accuracy_score(Y_train,X_train_prediction)

In [None]:
print('Accuracy on training data :',training_data_accuracy)

In [None]:
X_test_prediction=model.predict(X_test)
test_data_accuracy= accuracy_score(Y_test,X_test_prediction)

In [None]:
print('Accuracy on test data :',test_data_accuracy)

In [None]:
# Buliding a predictive system
input_data = (20.29,14.34,135.1,1297,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575,0.1374,0.205,0.4,0.1625,0.2364,0.07678)

# Change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the numpy array as we are predicting for one datapoint
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)

print(prediction)

if(prediction[0]==0):
    print('The breast cancer is Malignent')
else:
    print('The breast cancer is Benign')