In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')
df

### **Check Missing Value**

In [None]:
df.dtypes

### **Check Balance Data**

In [None]:
sns.countplot(df.diagnosis)

In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [None]:
X = df.drop(columns=['id', 'diagnosis', 'Unnamed: 32'])
y = df.diagnosis

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state=42)

In [None]:
pca = PCA(n_components=2, whiten=True)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:
X_train_pca.shape

In [None]:
sns.scatterplot(X_train_pca[:, 0], X_train_pca[:, 1], hue=y_train)

### **Train Data**

In [None]:
X_train.columns

In [None]:
numerical_pipeline = Pipeline([
    ('scaling', StandardScaler())
])

# categorical_pipeline = Pipeline([
#     ('onehot', OneHotEncoder())
# ])

preprocessor = ColumnTransformer([
    ('numeric', numerical_pipeline, X_train.columns),
#     ('categoric', categorical_pipeline, ['diagnosis'])
])

pipeline = Pipeline([
    ('pre', preprocessor),
    ('pca', PCA()),
    ('algo', SVC(max_iter=500))
])

In [None]:
pipeline.get_params()

In [None]:
parameter = {
    'pca__n_components' : [19],
    'pca__whiten' : [True, False],
    'algo__C': [0.1,1, 10, 100], 
    'algo__gamma': [1,0.1,0.01,0.001],
}

In [None]:
model = RandomizedSearchCV(pipeline, parameter, cv=3, n_iter=50, n_jobs=-1, verbose=1, random_state=42)
model.fit(X_train, y_train)

In [None]:
print(model.best_params_)
print(model.score(X_train, y_train)), print(model.score(X_test, y_test))