# Cancer

In [None]:
%matplotlib inline

## Import libs

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

## Import dataset

In [None]:
cancer = pd.read_csv("../input/cancer.csv")

Display some data

In [None]:
cancer.head(10)

In [None]:
cancer.tail(10)

In [None]:
cancer.info()

In [None]:
cancer.describe()

In [None]:
cancer["diagnosis"].value_counts()

### Display some things

In [None]:
sns.jointplot("texture_mean", "radius_mean", cancer, kind='kde');

In [None]:
sns.violinplot(x="diagnosis", y="radius_mean", data=cancer)

In [None]:
fig = sns.FacetGrid(cancer, hue="diagnosis", aspect=3) # aspect=3 permet d'allonger le graphique
fig.map(sns.kdeplot, "radius_mean", shade=True)
fig.add_legend()

In [None]:
plt.figure(figsize=(12,12))
sns.clustermap(cancer.corr())

## ML

In [None]:
cancer['final_class'] = cancer["diagnosis"].map({"B":0, "M":1})
cancer = cancer.drop("diagnosis", axis=1)
cancer.head()

In [None]:
data_train = cancer.sample(frac=0.8)
data_test = cancer.drop(data_train.index)

X_train = data_train.drop(["final_class"], axis=1)
y_train = data_train["final_class"]
X_test = data_test.drop(["final_class"], axis=1)
y_test = data_test["final_class"]

### Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train,y_train)

y_lr = lr.predict(X_test)
print(np.array(y_test))
print(y_lr)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
lr_score = accuracy_score(y_test, y_lr)
print(lr_score)

In [None]:
cm = confusion_matrix(y_test, y_lr)
print(cm)

### Decision Tree Classifier

In [None]:
from sklearn import tree
dtc = tree.DecisionTreeClassifier(max_depth = 5, min_samples_leaf = 10)
dtc.fit(X_train,y_train)
y_dtc = dtc.predict(X_test)
print(accuracy_score(y_test, y_dtc))
cm = confusion_matrix(y_test, y_dtc)
print(cm)

### Random Forest

In [None]:
from sklearn import ensemble
rf = ensemble.RandomForestClassifier()
rf.fit(X_train,y_train)
y_rf = dtc.predict(X_test)
print(accuracy_score(y_test, y_rf))
cm = confusion_matrix(y_test, y_rf)
print(cm)

In [None]:
importances = rf.feature_importances_
indices = np.argsort(importances)
plt.figure(figsize=(8,5))
plt.barh(range(len(indices)), importances[indices], color='r', align='center')
plt.yticks(range(len(indices)), cancer.columns[indices])
plt.title('Importance des caracteristiques')

### MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes = (40,20,10))
mlp.fit(X_train,y_train)
y_mlp = dtc.predict(X_test)
print(accuracy_score(y_test, y_mlp))
cm = confusion_matrix(y_test, y_mlp)
print(cm)

print(classification_report(y_mlp, y_test))