# Preparation

In [None]:
#import libraries

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set (color_codes=True)



In [None]:
#import file

dataset = pd.read_csv('/kaggle/input/company-bankruptcy-prediction/data.csv')
col = dataset.columns.values

# Checking Data Condition

In [None]:
dataset.head(5)

In [None]:
dataset.dtypes

In [None]:
dataset.shape


In [None]:
duplicate_rows_dataset = dataset[dataset.duplicated()]
print("number of duplicate rows: ", duplicate_rows_dataset.shape)

> no duplicate rows

# Finding The Missing Data

In [None]:
#Finding the missing data

print(dataset.isnull().sum())


> no missing data

# Exploratory Data Analysis

In [None]:
from sklearn.preprocessing import MinMaxScaler

MMS = MinMaxScaler()

data= MMS.fit_transform(dataset.values)
data= pd.DataFrame(data, columns=col)

data_mean = data.loc[dataset["Bankrupt?"]].mean()
data_std = data.loc[dataset["Bankrupt?"]].std()


In [None]:
visual = sns.catplot(x=np.arange(len(col)) , y = data_mean[:], kind = "bar", data=data)
sns.despine()
plt.title("Data Standardization_Mean")
visual.fig.set_size_inches(30,10)



visual = sns.catplot(x=np.arange(len(col)) , y = data_std[:], kind = "bar",color = "black", data=data)
sns.despine()
plt.title("Data Standardization_Standard Deviation")
visual.fig.set_size_inches(30,10)



# Data Separation

In [None]:
X = data.iloc[:, 1:96].values              
y = data.iloc[:, 0].values               

print(X)


In [None]:
data.head(5)

# Data Splitting

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling (Standardization)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train

# Data Analysis

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0, gamma = 0.1)
classifier.fit(X_train, y_train)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

> accuracy has been imported to PCA analysis

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, cohen_kappa_score, roc_auc_score


accuracy = accuracy_score(y_test, y_pred)
print('Accuracy : %f' % accuracy)

precision = precision_score(y_test, y_pred)
print('Precision : %f' % precision)

recall = recall_score(y_test, y_pred)
print('Recall : %f' % recall)

f1 = f1_score(y_test, y_pred)
print('F1 : %f' % f1)

kappa = cohen_kappa_score(y_test, y_pred)
print('Cohens kappa : %f' % kappa)

auc = roc_auc_score(y_test, y_pred)
print('ROC AUC : %f' % auc)

cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
from sklearn.metrics import plot_confusion_matrix
plot_confusion_matrix(classifier, X_test, y_test)