In [None]:
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [None]:
data = pd.read_csv("../input/company-bankruptcy-prediction/data.csv")

# Oversampling Method
[Oversampling](https://imbalanced-learn.org/stable/over_sampling.html)

In [None]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

y = data["Bankrupt?"]
X = data.drop(columns=["Bankrupt?"])

over_sample=SMOTE()
X_ros, y_ros=over_sample.fit_resample(X,y)

X_train, X_test, y_train, y_test = train_test_split(X_ros, y_ros, test_size=0.3)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

def KNC(X_train, X_test, y_train, y_test, n_neighbors=2):
    KNC = KNeighborsClassifier(n_neighbors=n_neighbors)
    KNC.fit(X_train, y_train)
    
    y_prediction = KNC.predict(X_test)
    
    confusionMatrix = confusion_matrix(y_test, y_prediction)
    classificationReport = classification_report(y_test, y_prediction)
    
    fpr, tpr, _ = roc_curve(y_test, y_prediction)
    precision, recall, _ = precision_recall_curve(y_test, y_prediction)

    fig = ff.create_annotated_heatmap(z=confusion_matrix(y_test, y_prediction), x=["Predicted 0", "Predicted 1"], y=["True 0", "True 1"], colorscale="ice")
    fig.show()
    
    fig = px.line(x=fpr, y=tpr, title="ROC Curve", labels=dict(x="False Positive Rate", y="True Positive Rate"))
    fig.show()
    
    fig = px.line(x=precision, y=recall, title="Precision Recall Curve", labels=dict(x="Precision", y="Recall")) 
    fig.show()
    
    accuracyScore = accuracy_score(y_test, y_prediction)
    rocaucScore = roc_auc_score(y_test, y_prediction)
    f1Score = f1_score(y_test, y_prediction, average=None)
    aucScore = auc(fpr, tpr)
    precisionScore = precision_score(y_test, y_prediction, average=None)
    recallScore = recall_score(y_test, y_prediction, average=None)


    
    fig = go.Figure(data=[go.Table(header=dict(values=['Type', 'Scores']),
                 cells=dict(values=[["Accuracy", "ROC AUC", "AUC", "F1", "Precision", "Recall"], [accuracyScore, rocaucScore, aucScore, f1Score, precisionScore, recallScore]]))
                         ])
    fig.show()
    
    print(classificationReport)
    
    
    return y_prediction

In [None]:
y_prediction = KNC(X_train, X_test, y_train, y_test)

In [None]:
from sklearn.svm import SVC


def SVClassifier(X_train, X_test, y_train, y_test):
    svc = SVC()
    svc.fit(X_train, y_train)
    
    y_prediction = svc.predict(X_test)
    confusionMatrix = confusion_matrix(y_test, y_prediction)
    classificationReport = classification_report(y_test, y_prediction)
    
    fpr, tpr, _ = roc_curve(y_test, y_prediction)
    precision, recall, _ = precision_recall_curve(y_test, y_prediction)

    fig = ff.create_annotated_heatmap(z=confusion_matrix(y_test, y_prediction), x=["Predicted 0", "Predicted 1"], y=["True 0", "True 1"], colorscale="ice")
    fig.show()
    
    fig = px.line(x=fpr, y=tpr, title="ROC Curve", labels=dict(x="False Positive Rate", y="True Positive Rate"))
    fig.show()
    
    fig = px.line(x=precision, y=recall, title="Precision Recall Curve", labels=dict(x="Precision", y="Recall")) 
    fig.show()
    
    accuracyScore = accuracy_score(y_test, y_prediction)
    rocaucScore = roc_auc_score(y_test, y_prediction)
    f1Score = f1_score(y_test, y_prediction, average=None)
    aucScore = auc(fpr, tpr)
    precisionScore = precision_score(y_test, y_prediction, average=None)
    recallScore = recall_score(y_test, y_prediction, average=None)


    
    fig = go.Figure(data=[go.Table(header=dict(values=['Type', 'Scores']),
                 cells=dict(values=[["Accuracy", "ROC AUC", "AUC", "F1", "Precision", "Recall"], [accuracyScore, rocaucScore, aucScore, f1Score, precisionScore, recallScore]]))
                         ])
    fig.show()
    
    print(classificationReport)
    
    
    return y_prediction

In [None]:
y_prediction = SVClassifier(X_train, X_test, y_train, y_test)

In [None]:
from sklearn.linear_model import LogisticRegression

def Logistic(X_train, X_test, y_train, y_test):
    LR = LogisticRegression()
    LR.fit(X_train, y_train)
    
    y_prediction = LR.predict(X_test)
    confusionMatrix = confusion_matrix(y_test, y_prediction)
    classificationReport = classification_report(y_test, y_prediction)
    
    fpr, tpr, _ = roc_curve(y_test, y_prediction)
    precision, recall, _ = precision_recall_curve(y_test, y_prediction)

    fig = ff.create_annotated_heatmap(z=confusion_matrix(y_test, y_prediction), x=["Predicted 0", "Predicted 1"], y=["True 0", "True 1"], colorscale="ice")
    fig.show()
    
    fig = px.line(x=fpr, y=tpr, title="ROC Curve", labels=dict(x="False Positive Rate", y="True Positive Rate"))
    fig.show()
    
    fig = px.line(x=precision, y=recall, title="Precision Recall Curve", labels=dict(x="Precision", y="Recall")) 
    fig.show()
    
    accuracyScore = accuracy_score(y_test, y_prediction)
    rocaucScore = roc_auc_score(y_test, y_prediction)
    f1Score = f1_score(y_test, y_prediction, average=None)
    aucScore = auc(fpr, tpr)
    precisionScore = precision_score(y_test, y_prediction, average=None)
    recallScore = recall_score(y_test, y_prediction, average=None)


    
    fig = go.Figure(data=[go.Table(header=dict(values=['Type', 'Scores']),
                 cells=dict(values=[["Accuracy", "ROC AUC", "AUC", "F1", "Precision", "Recall"], [accuracyScore, rocaucScore, aucScore, f1Score, precisionScore, recallScore]]))
                         ])
    fig.show()
    
    print(classificationReport)
    
    
    return y_prediction

In [None]:
y_prediction = Logistic(X_train, X_test, y_train, y_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier


def RandomForest(X_train, X_test, y_train, y_test):
    RFC = RandomForestClassifier()
    RFC.fit(X_train, y_train)
    
    y_prediction = RFC.predict(X_test)
    confusionMatrix = confusion_matrix(y_test, y_prediction)
    classificationReport = classification_report(y_test, y_prediction)
    
    fpr, tpr, _ = roc_curve(y_test, y_prediction)
    precision, recall, _ = precision_recall_curve(y_test, y_prediction)

    fig = ff.create_annotated_heatmap(z=confusion_matrix(y_test, y_prediction), x=["Predicted 0", "Predicted 1"], y=["True 0", "True 1"], colorscale="ice")
    fig.show()
    
    fig = px.line(x=fpr, y=tpr, title="ROC Curve", labels=dict(x="False Positive Rate", y="True Positive Rate"))
    fig.show()
    
    fig = px.line(x=precision, y=recall, title="Precision Recall Curve", labels=dict(x="Precision", y="Recall")) 
    fig.show()
    
    accuracyScore = accuracy_score(y_test, y_prediction)
    rocaucScore = roc_auc_score(y_test, y_prediction)
    f1Score = f1_score(y_test, y_prediction, average=None)
    aucScore = auc(fpr, tpr)
    precisionScore = precision_score(y_test, y_prediction, average=None)
    recallScore = recall_score(y_test, y_prediction, average=None)


    
    fig = go.Figure(data=[go.Table(header=dict(values=['Type', 'Scores']),
                 cells=dict(values=[["Accuracy", "ROC AUC", "AUC", "F1", "Precision", "Recall"], [accuracyScore, rocaucScore, aucScore, f1Score, precisionScore, recallScore]]))
                         ])
    fig.show()
    
    print(classificationReport)
    
    
    
    return y_prediction

In [None]:
%%time
y_prediction = RandomForest(X_train, X_test, y_train, y_test)