In [5]:
import tensorflow as tf
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# fetch dataset 
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17) 
  
# data (as pandas dataframes) 
X = breast_cancer_wisconsin_diagnostic.data.features 
y = breast_cancer_wisconsin_diagnostic.data.targets 

In [6]:
import pandas as pd

# Data preprocessing
data=pd.concat([X,y],axis=1)
# if Malignant(M):1, if benign(B):0
data['Diagnosis']=data['Diagnosis'].apply(lambda val: 1 if val == 'M' else 0)
# Remove highly correlated features
correlation_mat = data.corr()
#correlation threshold
threshold = 0.95 #setting to 0.95

highly_correlated = correlation_mat[abs(correlation_mat) > threshold]
highly_correlated = highly_correlated[highly_correlated != 1].stack().index.tolist()

# which ones to remove
remove = set()
for (feature1, feature2) in highly_correlated:
    remove.add(feature1 if correlation_mat.loc[feature1, feature2] > 0 else feature2)

# drop highly correlated features from the dataframe
data = data.drop(remove, axis=1)


In [10]:
import ydf
from sklearn.model_selection import KFold
import numpy as np
tuner=ydf.RandomSearchTuner(num_trials=50)
tuner.choice("num_trees",[3,4,5,6,7,8,9,10])
tuner.choice("max_depth",[3,4,5,6,7,8,9,10])
kf=KFold(n_splits=9,shuffle=True, random_state=50)
mod=[]
eval=[]
for traI, testI in kf.split(data):
    train=data.iloc[traI]
    test=data.iloc[testI]
    model = ydf.RandomForestLearner(label='Diagnosis', num_trees=10,max_depth=5).train(train)
    mod.append(model)
    evaluation=model.evaluate(test)
    eval.append(evaluation.accuracy)
index=np.argmax(eval)
mo=mod[index]
mo.save("model")

Train model on 505 examples
Model trained in 0:00:00.021569
Train model on 505 examples
Model trained in 0:00:00.017100
Train model on 506 examples
Model trained in 0:00:00.019014
Train model on 506 examples
Model trained in 0:00:00.024439
Train model on 506 examples
Model trained in 0:00:00.018525
Train model on 506 examples
Model trained in 0:00:00.024339
Train model on 506 examples
Model trained in 0:00:00.024110
Train model on 506 examples
Model trained in 0:00:00.017930
Train model on 506 examples
Model trained in 0:00:00.032402
