Import libraries and load data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()

In [None]:
cancer.keys()

We can grab information and arrays out of this dictionary to create data frame and understand the features

The description of features are as follows

In [None]:
print(cancer['DESCR'])

Show the feature names

In [None]:
cancer['feature_names']

Set up the DataFrame



In [None]:
df = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
df.info()

In [None]:
df.describe()

In [None]:
np.sum(pd.isnull(df).sum())


In [None]:
cancer['target']

Adding the target data to the DataFrame

In [None]:
df['Cancer'] = pd.DataFrame(cancer['target'])
df.head()

Exploratory Data Analysis
Check the relative counts of benign (0) vs malignant (1)
cases of cancer

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Cancer',data=df,palette='RdBu_r')

Draw boxplots of all the mean features (first 10 columns) for '0' and '1' CANCER OUTCOME


In [None]:
l=list(df.columns[0:10])
for i in range(len(l)-1):
    sns.boxplot(x='Cancer',y=l[i], data=df, palette='winter')
    plt.figure()

Not all the features seperate out the cancer predictions equally clearly
For example, from the following two plots it is clear that smaller
area generally is indicative of positive cancer detection,
while nothing concrete can be said from the plot of mean smoothness

In [None]:
f,(ax1, ax2) = plt.subplots(1, 2, sharey=True,figsize=(12,6))
ax1.scatter(df['mean area'],df['Cancer'])
ax1.set_title("Cancer cases as a function of mean area",
              fontsize=15)
ax2.scatter(df['mean smoothness'],df['Cancer'])
ax2.set_title("Cancer cases as a function of mean smoothness",
              fontsize=15)

Training and prediction
Train Test Split

In [None]:
df_feat = df.drop('Cancer',axis=1)
df_feat.head()

In [None]:
df_target = df['Cancer']
df_target.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_feat, df_target,test_size=0.30,
                 random_state=101)
X_train.head()
X_train.head()

In [None]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
print(classification_report(y_test,predictions))

In [None]:
print("Misclassification error rate:",
      round(np.mean(predictions!=y_test),3))

In [None]:
param_grid = {'C': [0.1,1, 10, 100, 1000],
              'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']}
from sklearn.model_selection import GridSearchCV

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=1)

In [None]:
#Import Necessary Libraries
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import numpy as np
# Assuming you have X_train, y_train, X_test, y_test defined
# Define the parameter grid
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']}

# Initialize GridSearchCV
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=1)

# Fit the model
grid.fit(X_train, y_train)  # Assuming you have X_train and y_train

# Make predictions on the test set
grid_predictions = grid.predict(X_test)  # Assuming you have X_test

# Now you can print the confusion matrix
print(confusion_matrix(y_test, grid_predictions))

In [None]:
print(classification_report(y_test,grid_predictions))