In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('data.csv')

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [None]:
df.drop(['Unnamed: 32','id'], inplace = True, axis = 1 )

In [None]:
df['diagnosis'] = df['diagnosis'].map({'M':1,'B':0})

In [None]:
def scaling(X,Y, scale,over_sampling = False):
  scale.fit(X)
  scaled_data = scale.transform(X)
  osr = RandomOverSampler()
  if over_sampling:
    x_scaled, y_osr = osr.fit_resample(scaled_data, y)
  else:
    x_scaled = scaled_data
    y_osr = y
  return   x_scaled, y_osr

In [None]:
X = df.drop('diagnosis', axis = 1)
y = df['diagnosis']

In [None]:
grid = {
    'C':[0.1,1,10,100],
    'gamma':[1,0.1,0.01,0.001],
    'kernel':['linear','poly','sigmoid','rbf','laplacian'],
    'degree':[1,2,3,4,5]
}

Without Scaling

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
svc_m = SVC(random_state=101)
cross_val = cross_val_score(svc_m, X_train, y_train, cv = 5)
print("The traing accuracy", np.mean(cross_val))
svc_m.fit(X_train, y_train)
print('The test accuracy',svc_m.fit(X_train, y_train).score(X_test, y_test))


The traing accuracy 0.8943354430379747
The test accuracy 0.935672514619883


With scaling

## Defauld parameters

In [None]:
for i in [StandardScaler(), MinMaxScaler()]:
  print(f"The Scaling is:{i}")
  X_scaled,Y_scaled = scaling(X,y, i,True)
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y_scaled, test_size=0.3, random_state=42)
  svc_m = SVC(random_state=101)
  cross_val = cross_val_score(svc_m, X_train, y_train, cv = 5)
  print("The traing accuracy", np.mean(cross_val))
  svc_m.fit(X_train, y_train)
  print('The test accuracy',svc_m.fit(X_train, y_train).score(X_test, y_test))
  print("*******************************************************************")



The Scaling is:StandardScaler()
The traing accuracy 0.969959595959596
The test accuracy 0.9674418604651163
*******************************************************************
The Scaling is:MinMaxScaler()
The traing accuracy 0.9779191919191919
The test accuracy 0.9720930232558139
*******************************************************************


In [None]:
for i in [StandardScaler(), MinMaxScaler()]:
  print(f"The Scaling is:{i}")
  X_scaled, Y_scaled= scaling(X,y, i,True)
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y_scaled, test_size=0.3, random_state=42)
  svc_m = SVC(random_state=101)
  grid_search = GridSearchCV(svc_m, grid, cv = 5)
  grid_search.fit(X_train, y_train)
  print("the best parameters:",grid_search.best_params_)
  print("the best traing score:",grid_search.best_score_)
  print('The test accuracy',grid_search.score(X_test, y_test))
  print("*******************************************************************")



The Scaling is:StandardScaler()
the best parameters: {'C': 1, 'degree': 1, 'gamma': 1, 'kernel': 'linear'}
the best traing score: 0.9839595959595959
The test accuracy 0.9720930232558139
*******************************************************************
The Scaling is:MinMaxScaler()
the best parameters: {'C': 1, 'degree': 1, 'gamma': 1, 'kernel': 'rbf'}
the best traing score: 0.9779595959595959
The test accuracy 0.9767441860465116
*******************************************************************


In [None]:
The Scaling is:StandardScaler()
the best parameters: {'C': 1, 'degree': 1, 'gamma': 1, 'kernel': 'linear'}
the best traing score: 0.9748101265822784
The test accuracy 0.9766081871345029
*******************************************************************
The Scaling is:MinMaxScaler()
the best parameters: {'C': 1, 'degree': 2, 'gamma': 1, 'kernel': 'poly'}
the best traing score: 0.977373417721519
The test accuracy 0.9883040935672515
****************************************