## Breast Cancer Coimbra Dataset(UCI) using Logistic Regression with Scikit Learn

#### Part 1: Data Preprocessing

In [34]:
# import the Libraries
import numpy as np                 # used for multidimensional array
import pandas as pd               # used for import the dataset
import matplotlib.pyplot as plt   # used for plot the Graph

In [35]:
# import the dataset
dataset= pd.read_csv('Breast cancer.csv')
X= dataset.iloc[:,:-1].values      # predictor attribute
y= dataset.iloc[:,-1].values       # Target attribute

In [36]:
# Split the dataset into test set and Train set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [37]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X= StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

#### Part 2: Building the Logistic Regression Model

In [38]:
# import the Logistic regression model from scikit learn
from sklearn.linear_model import LogisticRegression

In [39]:
# init the model
LG = LogisticRegression(random_state=0)

In [40]:
# fit the training data into our model
LG.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

#### Part 3: Making a prediction and Visualize the result

In [41]:
# predicted the result
y_pred = LG.predict(X_test)
y_pred

array([1, 2, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2,
       1, 2], dtype=int64)

In [42]:
# Confusion metric
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm

array([[ 5,  6],
       [ 3, 10]], dtype=int64)

In [43]:
# Accuracy score
from sklearn.metrics import accuracy_score
ac=accuracy_score(y_test, y_pred)
ac

0.625

In [None]:
# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, LG.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Logistic Regression (Test set)')
plt.xlabel('Distance')
plt.ylabel('Cities')
plt.legend()
plt.show()