In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.set_printoptions(precision=4)
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score , confusion_matrix
from sklearn.inspection import permutation_importance
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/kaggle/input/company-bankruptcy-prediction/data.csv')
df.head(5)

## Check for missing data

In [None]:
df.info()

No missing data found

## Check for imbalance data

In [None]:
df['Bankrupt?'].value_counts()

In [None]:
data = df["Bankrupt?"].value_counts()
plt.pie(data,autopct='%1.2f%%',labels=data.index)
plt.show()

The dataset is imbalance

# Data Preprocessing

In [None]:
X = df.copy() 

y = df['Bankrupt?']

X = X.drop(['Bankrupt?'], axis=1)

In [None]:
X.columns

# Split data to train data and test data

In [None]:
#Split to data train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1234)

# Balancing Data

In [None]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=1234)

X_smote, y_smote = sm.fit_resample(X_train, y_train)

print(f'''Shape of X before SMOTE: {X.shape}
Shape of X after SMOTE: {X_smote.shape}''')

print('\nBalance of positive and negative classes (%):')
y_smote.value_counts(normalize=True) * 100

# Feature Scalling

In [None]:
#from sklearn.preprocessing import StandardScaler
#sc = StandardScaler()

#X_smote = sc.fit_transform(X_smote)
#X_test = sc.transform(X_test)

# Import ML Libraries

In [None]:
# Import ML Libraries
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

classifiers = [[CatBoostClassifier(verbose=0),'CatBoost Classifier'],[XGBClassifier(),'XGB Classifier'], [RandomForestClassifier(),'Random Forest'], 
    [KNeighborsClassifier(), 'K-Nearest Neighbours'], [SGDClassifier(),'SGD Classifier'], [SVC(),'SVC'],[LGBMClassifier(),'LGBM Classifier'],
              [GaussianNB(),'GaussianNB'],[DecisionTreeClassifier(),'Decision Tree Classifier'],[LogisticRegression(),'Logistic Regression']]

# Predicting

In [None]:
for cls in classifiers:
    model = cls[0]
    model.fit(X_smote, y_smote)
    
    y_pred = model.predict(X_test)
    print(cls[1])
    print ('Confusion Matrix:')
    print(confusion_matrix(y_test, y_pred))
    print("Accuracy : ", accuracy_score(y_test, y_pred) *  100)
    print("Recall : ", recall_score(y_test, y_pred) *  100)
    print("Precision : ", precision_score(y_test, y_pred) *  100)
    print("F1 : ", f1_score(y_test, y_pred) *  100)
    print("\n\n\n")

# Result

With Feature Scalling

* For Best Accuracy : LGBM Classifier (acc : 96.7008797653959)
* For Best Recall : GaussianNB (recall: 90.0)
* For Best Precision : LGBM Classifier (Precision: 44.89795918367347)
* For Best F1 : LGBM Classifier (F1: 49.43820224719101)

Without Feature Scalling

* For Best Accuracy : XGB Classifier (acc : 96.7008797653959) 
* For Best Recall : GaussianNB (recall: 95.0)
* For Best Precision : XGB Classifier (Precision: 44.89795918367347)
* For Best F1 : XGB Classifier (F1: 49.43820224719101) 