In [None]:
!pip install plotly.express


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
dataset = pd.read_csv('/kaggle/input/breast-cancer-wisconsin-data/data.csv')

In [None]:
dataset

In [None]:
dataset.describe()


In [None]:
dataset.info()


In [None]:
dataset.isnull()

In [None]:
sns.heatmap(dataset.isnull(),yticklabels=False,cbar=False,cmap='Pastel1')

In [None]:
dataset.drop('Unnamed: 32',axis=1,inplace=True)

In [None]:
dataset.shape

In [None]:
sns.set_style('darkgrid')
sns.countplot(x='diagnosis',data=dataset)

In [None]:
dataset['diagnosis'].value_counts()

In [None]:
#encoding categorical data values
# M = 1 , B = 0
from sklearn.preprocessing import LabelEncoder

labelencoder = LabelEncoder()
dataset.iloc[:,1] = labelencoder.fit_transform(dataset.iloc[:,1].values)
dataset.iloc[:,1]


In [None]:
dataset

In [None]:
dataset.diagnosis.describe()

In [None]:
fig = px.histogram(dataset, 
                   x='diagnosis', 
                   marginal='box', 
                   nbins=0, 
                   title='Distribution of ')
fig.update_layout(bargap=0.1)
fig.show()

In [None]:
dataset.radius_mean.describe()

In [None]:
fig = px.histogram(dataset, 
                   x='radius_mean', 
                   marginal='box', 
                   nbins=21, 
                   title='Distribution of radius_mean')
fig.update_layout(bargap=0.1)
fig.show()

In [None]:
sns.pairplot(dataset.iloc[:,1:6], hue = 'diagnosis')

In [None]:
dataset.iloc[:,1:12].corr()

In [None]:
plt.figure(figsize=(14,10))
sns.heatmap(dataset.iloc[:,1:12].corr(), annot=True)

In [None]:
X = dataset.iloc[:,2:31].values
Y = dataset.iloc[:,1].values

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)

In [None]:
#Scaling the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
def Models(X_train,Y_train):

    #Logistic Regression
    from sklearn.linear_model import LogisticRegression
    import time
    start = time.time()
    log = LogisticRegression(random_state=0)
    log.fit(X_train, Y_train)
    stop = time.time()
    print(f"LogisticRegression Training time: {stop - start}s")
    
    #Decision Tree Classifier
    from sklearn.tree import DecisionTreeClassifier
    import time
    start=time.time()
    regressor = DecisionTreeClassifier(criterion = 'entropy',random_state = 0)
    regressor.fit(X_train, Y_train)
    stop =time.time()
    print(f"DecisionTreeClassifier Training time: {stop - start}s")


     #Random Forest
    from sklearn.ensemble import RandomForestClassifier
    import time
    start=time.time()
    random_forest = RandomForestClassifier(n_estimators=10,criterion = 'entropy', random_state=3)
    random_forest.fit(X_train, Y_train)
    stop =time.time()
    print(f"RandomForestClassifier Training time: {stop - start}s")
    
     #KNeighbors Classifier
    from sklearn.neighbors import KNeighborsClassifier
    import time
    start=time.time()
    classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    classifier.fit(X_train, Y_train)
    stop=time.time()
    print(f"KNeighborsClassifier Training time: {stop - start}s")
    
    
    
    
        #Model Accuracy on Training Data
    print("\n\n\n")
    print('[0]Logistic Regression Training Acc:', log.score(X_train,Y_train))
    print('[1]Decision Tree Training Acc:', regressor.score(X_train,Y_train))
    print('[2]Random Forest Training Acc:', random_forest.score(X_train,Y_train))
    print('[3]KNeighbor Acc:', classifier.score(X_train,Y_train))
    
    return log, regressor, random_forest,classifier

    

In [None]:
model= Models(X_train,Y_train)

In [None]:
# Acc on Testing Data
from sklearn.metrics import confusion_matrix

for i in range(len(model)):
    print('model ', i)
    cm = confusion_matrix(Y_test, model[i].predict(X_test))

    tp = cm[0][0]
    tn = cm[1][1]
    fp = cm[1][0]
    fn = cm[0][1]

    print(cm)
    print('Testing Acc = ', (tp + tn)/(tp +tn +fn + fp))
    print()

In [None]:
# Logistic Regression
pred = model[0].predict(X_test)
print(pred)
print()
print(Y_test)

In [None]:
# DecisoinTree Regression
pred = model[1].predict(X_test)
print(pred)
print()
print(Y_test)

In [None]:
# RandomForest Regression
pred = model[2].predict(X_test)
print(pred)
print()
print(Y_test)

In [None]:
# KNeighbour Regression
pred = model[3].predict(X_test)
print(pred)
print()
print(Y_test)