# Import Librairies

In [None]:
import numpy as np
import pandas as pd
import pathlib
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

# Load Data

In [None]:
path = pathlib.Path.cwd().parent
path = path / "input"/ "breast-cancer-wisconsin-data"
df = pd.read_csv(f"{path}/data.csv")

# Data preproccesing

In [None]:
keys = df.columns
# Drop id and Unnamed :32 columns
keys = [key for key in keys if key not in {"id", "Unnamed: 32"}]

# Drop rows with 0 values
df = df[keys].replace(0, np.nan)
df.dropna(inplace=True)

# Encode diagnosis column
df[["diagnosis"]] = df[["diagnosis"]].replace("M", 1)
df[["diagnosis"]] = df[["diagnosis"]].replace("B", 0)

# Build Datasets

In [None]:
# Split dataset
data = df.sample(frac=0.95, random_state=786)
data_unseen = df.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(df.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# Build train and test datasets
x_train, y_train = data[keys[1:]], data[keys[0]]
x_test, y_test = data_unseen[keys[1:]], data_unseen[keys[0]]

# Trainning

In [None]:
rf = RandomForestClassifier(n_estimators=1000)
rf.fit(x_train, y_train)

gb = GradientBoostingClassifier(n_estimators=1000)
gb.fit(x_train, y_train)

ab = AdaBoostClassifier(n_estimators=1000)
ab.fit(x_train, y_train)

# Predictions

In [None]:
print("\nPredictions of Random Forest algorithm:")
print(f"\tTraining Accuracy : {100 * rf.score(x_train, y_train):.3g}%")
print(f"\tTesting Accuracy : {100 * rf.score(x_test, y_test):.3g}%")

print("\nPredictions of Gradient boosting algorithm:")
print(f"\tTraining Accuracy : {100 * gb.score(x_train, y_train):.3g}%")
print(f"\tTesting Accuracy : {100 * gb.score(x_test, y_test):.3g}%")

print("\nPredictions of Adaptative Boosting algorithm:")
print(f"\tTraining Accuracy : {100 * ab.score(x_train, y_train):.3g}%")
print(f"\tTesting Accuracy : {100 * ab.score(x_test, y_test):.3g}%")