In [None]:
# IMPORTING LIBRARIES

# General Libraries

import os
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import norm
from collections import Counter
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action="ignore")

# additional libraries

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from miceforest import ImputationKernel
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
import statistics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_curve
import matplotlib.gridspec as gridspec
import missingno as msno
from statsmodels.stats.outliers_influence import variance_inflation_factor


# Machine Learning Libraries

import sklearn
import xgboost as xgb
from sklearn import tree
from sklearn.svm import SVC
from catboost import CatBoostClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from catboost import CatBoostClassifier
from sklearn import svm
import lightgbm as lgb
from lightgbm import LGBMClassifier
import xgboost as xgb

In [None]:
# we first read the dataset with normal companies
final_normal = pd.read_excel('final_normal.xlsx')
print ('The Shape of Dataset: ',final_normal.shape)
final_normal.head()

In [None]:
# then we read the dataset with companies labelled with financial distress
final_fin_distress = pd.read_excel('final_fin_distress.xlsx')
print ('The Shape of Dataset: ',final_fin_distress.shape)
final_fin_distress.head()

In [None]:
final_normal['FINANCIAL_DISTRESS'] = 0
final_fin_distress['FINANCIAL_DISTRESS'] = 1

In [None]:
print ('The Shape of Dataset: ',final_fin_distress.shape)
final_fin_distress.head()

In [None]:
print ('The Shape of Dataset: ',final_normal.shape)
final_normal.head()

In [None]:
dataset = pd.concat([final_normal, final_fin_distress])

In [None]:
print ('The Shape of Dataset: ',dataset.shape)
dataset.head()

In [None]:
# checking for missing data
msno.matrix(dataset)

In [None]:
# we see that some columns contain too many missing rows
dataset.drop(columns=['AssetsNoncurrent', 'LiabilitiesNoncurrent', 'GrossProfit'], inplace=True)

In [None]:
# checking for missing data
msno.matrix(dataset)

In [None]:
# drop non-informational columns
dataset.drop(columns=['ticker', 'CIK', 'NewsNegative', 'NewsPositive', 'NewsNeutral'], inplace=True)

In [None]:
mice_kernel = ImputationKernel(
data = dataset,
save_all_iterations = True,
random_state = 2023
)

In [None]:
mice_kernel.mice(2)
mice_imputation = mice_kernel.complete_data()
mice_imputation.head()

In [None]:
dataset = mice_imputation

In [None]:
# checking for missing data
msno.matrix(dataset)

In [None]:
# see the histograms in order to compare with normal distribution
fig, ax = plt.subplots(10,3, figsize=(17,12), constrained_layout=True)
ax=ax.flatten()
sns.set_style("darkgrid")
for num, col in enumerate(dataset.columns):
    sns.histplot(dataset[col], ax=ax[num])
plt.show()

In [None]:
dataset.replace([np.inf, -np.inf], 0, inplace=True)

In [None]:
plt.figure(figsize=(12,9))
sns.heatmap(dataset.corr(), annot=True, fmt='.2f')

In [None]:
# correlation between concrete_compressive_strength(label) and the rest (features)
corr=dataset.corr()
corr['FINANCIAL_DISTRESS']
correlation_df=pd.Series(corr['FINANCIAL_DISTRESS']).to_frame()
correlation_df.sort_values(by='FINANCIAL_DISTRESS',ascending=False)

In [None]:
# we should also check data for multicollinearity
## VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = dataset.drop('FINANCIAL_DISTRESS', axis=1).columns
  
### calculating VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(dataset.drop('FINANCIAL_DISTRESS', axis=1).values, i)
                          for i in range(len(dataset.drop('FINANCIAL_DISTRESS', axis=1).columns))]

In [None]:
vif_data

In [None]:
# basically we see, that for several features VIF is higher than 8-10, so some of them should be eliminated
dataset.drop('Assets', axis = 1, inplace=True)

In [None]:
# restate VIF test
vif_data = pd.DataFrame()
vif_data["feature"] = dataset.drop('FINANCIAL_DISTRESS', axis=1).columns
vif_data["VIF"] = [variance_inflation_factor(dataset.drop('FINANCIAL_DISTRESS', axis=1).values, i)
                          for i in range(len(dataset.drop('FINANCIAL_DISTRESS', axis=1).columns))]

In [None]:
vif_data

In [None]:
# basically we see, that for several features VIF is higher than 8-10, so some of them should be eliminated
dataset.drop(columns=['AssetsCurrent', 'LiabilitiesCurrent'], axis = 1, inplace=True)

In [None]:
# restate VIF test
vif_data = pd.DataFrame()
vif_data["feature"] = dataset.drop('FINANCIAL_DISTRESS', axis=1).columns
vif_data["VIF"] = [variance_inflation_factor(dataset.drop('FINANCIAL_DISTRESS', axis=1).values, i)
                          for i in range(len(dataset.drop('FINANCIAL_DISTRESS', axis=1).columns))]

In [None]:
vif_data

In [None]:
# basically we see, that for several features VIF is higher than 8-10, so some of them should be eliminated
dataset.drop(columns=['ROE'], axis = 1, inplace=True)

In [None]:
# restate VIF test
vif_data = pd.DataFrame()
vif_data["feature"] = dataset.drop('FINANCIAL_DISTRESS', axis=1).columns
vif_data["VIF"] = [variance_inflation_factor(dataset.drop('FINANCIAL_DISTRESS', axis=1).values, i)
                          for i in range(len(dataset.drop('FINANCIAL_DISTRESS', axis=1).columns))]

In [None]:
vif_data

In [None]:
# outliers detection through boxplots
for feature in dataset:
    data= dataset.copy()
    if 0 in data[feature].unique():
        pass
    else:
        data[feature]=np.log(data[feature])
        data.boxplot(column=feature)
        plt.ylabel(feature)
        plt.title(feature)
        plt.show()

In [None]:
dataset[['NewsOverall','polarity_MD&A', 'D/A', 'D/E', 'FINANCIAL_DISTRESS', 'ROA', 'CurrentRatio']].describe()

In [None]:
X = dataset.drop('FINANCIAL_DISTRESS',axis=1)
y = dataset['FINANCIAL_DISTRESS']

In [None]:
sns.countplot(y)

In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
oversample = SMOTE()
X,y=oversample.fit_resample(X,y)

In [None]:
sns.countplot(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

In [None]:
def print_evaluate(true, predicted):
    accuracy=np.round(accuracy_score(true, predicted),3)
    precision=np.round(precision_score(true, predicted),3)
    recall=np.round(recall_score(true, predicted),3)
    f1=np.round(f1_score(true, predicted),3)
    balanced_accuracy=np.round(balanced_accuracy_score(true, predicted),3)
    roc_auc = np.round(roc_auc_score(true, predicted),3)
    print('accuracy: ', accuracy)
    print('precision: ', precision)
    print('recall: ', recall)
    print('f1:', f1)
    print('balanced accuracy:', balanced_accuracy)
    print('roc_auc:', roc_auc)

def cross_val(model, X, y):
    pred=cross_val_score(model, X,y,cv=5)
    return np.round(pred.mean(),3)

def evaluate(true, predicted):
    accuracy=np.round(accuracy_score(true, predicted),3)
    precision=np.round(precision_score(true, predicted),3)
    recall=np.round(recall_score(true, predicted),3)
    f1=np.round(f1_score(true, predicted),3)
    balanced_accuracy=np.round(balanced_accuracy_score(true, predicted),3)
    roc_auc = np.round(roc_auc_score(true, predicted),3)
    
    return accuracy, precision, recall, f1, balanced_accuracy, roc_auc

In [None]:
log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)

In [None]:
train_pred = log_reg.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(log_reg, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=pd.DataFrame(data=[['Logistic Regression', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score'])
results_df

In [None]:
lasso = LogisticRegression(penalty='l1', solver='liblinear')
lasso.fit(X_train,y_train)

In [None]:
train_pred = lasso.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(lasso, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Lasso', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
ridge = LogisticRegression(penalty='l2')
ridge.fit(X_train,y_train)

In [None]:
train_pred = ridge.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(ridge, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Ridge', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
svc = SVC()
svc.fit(X_train,y_train)

In [None]:
train_pred = svc.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(svc, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Support Vector Machines', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
# test another classification algorithms
rfr = RandomForestClassifier()
rfr.fit(X_train,y_train)

In [None]:
train_pred = rfr.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(rfr, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Random Forest', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
# test another classification algorithms
knears = KNeighborsClassifier()
knears.fit(X_train,y_train)

In [None]:
train_pred = knears.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(knears, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['KNearest Neighbours', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
# test another classification algorithms
ab = AdaBoostClassifier()
ab.fit(X_train,y_train)

In [None]:
# predict target and make cross-validation
train_pred = ab.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(ab, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Ada Boost', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

In [None]:
# predict target and make cross-validation

train_pred = dt.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(dt, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Decision Tree', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
cb = CatBoostClassifier(silent=True)
cb.fit(X_train,y_train)

In [None]:
# predict target and make cross-validation

train_pred = cb.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(cb, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Cat Boost', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
xb = xgb.XGBClassifier().fit(X_train, y_train)

In [None]:
# predict target and make cross-validation

train_pred = xb.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(xb, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['XG Boost', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
lb = lgb.LGBMClassifier().fit(X_train, y_train)

In [None]:
# predict target and make cross-validation

train_pred = lb.predict(X_train)

print('Train set evaluation:\n')
print_evaluate(y_train, train_pred)
print('--------------------------------\n')
cross_score=cross_val(lb, X_train, y_train)
print('Cross_validation score:\n')
cross_score

In [None]:
results_df=results_df.append(pd.DataFrame(data=[['Light GBM', *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
results_df

In [None]:
def perform_diffrent_algorythms(models, X_train, y_train):
    results_df = pd.DataFrame()
    for name, model in models.items():
        
        model.fit(X_train,y_train)
        train_pred = model.predict(X_train)
        cross_score=cross_val(model, X_train, y_train)
        results_df=results_df.append(pd.DataFrame(data=[[name, *evaluate(y_train, train_pred), cross_score]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc', 'cross_score']))
        
    return results_df

In [None]:
models = {
    'Logistic Regression' : LogisticRegression(), 
    'Lasso' : LogisticRegression(penalty='l1', solver='liblinear'), 
    'Ridge' : LogisticRegression(penalty='l2'),
    'Support Vector Machines' : SVC(),
    'Random Forest' : RandomForestClassifier(),
    'KNearest Neighbours' : KNeighborsClassifier(),
    'Decision Tree' : DecisionTreeClassifier(),
    'Ada Boost' : AdaBoostClassifier(),
    'Cat Boost' : CatBoostClassifier(silent=True),
    'XG Boost' : xgb.XGBClassifier(),
    'Light GBM' : lgb.LGBMClassifier()
}

In [None]:
dataset_without_news = dataset.drop(columns=['NewsOverall'])
X_without_news = dataset_without_news.drop('FINANCIAL_DISTRESS',axis=1)
y_without_news = dataset_without_news['FINANCIAL_DISTRESS']
scaler = MinMaxScaler()
X_without_news = scaler.fit_transform(X_without_news)
oversample = SMOTE()
X_without_news,y_without_news = oversample.fit_resample(X_without_news,y_without_news)
X_train_without_news, X_test_without_news, y_train_without_news, y_test_without_news = train_test_split(X_without_news, y_without_news, test_size=0.25, random_state=102)

In [None]:
perform_diffrent_algorythms(models, X_train_without_news, y_train_without_news)

In [None]:
dataset_without_MDandA = dataset.drop(columns=['polarity_MD&A', 'subjectivity_MD&A'])
X_without_MDandA = dataset_without_MDandA.drop('FINANCIAL_DISTRESS',axis=1)
y_without_MDandA = dataset_without_MDandA['FINANCIAL_DISTRESS']
scaler = MinMaxScaler()
X_without_MDandA = scaler.fit_transform(X_without_MDandA)
oversample = SMOTE()
X_without_MDandA,y_without_MDandA = oversample.fit_resample(X_without_MDandA,y_without_MDandA)
X_train_without_MDandA, X_test_without_MDandA, y_train_without_MDandA, y_test_without_MDandA = train_test_split(X_without_MDandA, y_without_MDandA, test_size=0.25, random_state=103)

In [None]:
perform_diffrent_algorythms(models, X_train_without_MDandA, y_train_without_MDandA)

In [None]:
dataset_only_financials = dataset.drop(columns=['polarity_MD&A', 'subjectivity_MD&A', 'NewsOverall'])
X_only_financials = dataset_only_financials.drop('FINANCIAL_DISTRESS',axis=1)
y_only_financials = dataset_only_financials['FINANCIAL_DISTRESS']
scaler = MinMaxScaler()
X_only_financials = scaler.fit_transform(X_only_financials)
oversample = SMOTE()
X_only_financials,y_only_financials = oversample.fit_resample(X_only_financials,y_only_financials)
X_train_only_financials, X_test_only_financials, y_train_only_financials, y_test_only_financials = train_test_split(X_only_financials, y_only_financials, test_size=0.25, random_state=104)

In [None]:
perform_diffrent_algorythms(models, X_train_only_financials, y_train_only_financials)

In [None]:
def test_diffrent_algorythms(models, X_train, y_train, X_test, y_test):
    testing_df = pd.DataFrame()
    for name, model in models.items():
        
        model.fit(X_train,y_train)
        predictions = model.predict(X_test)
        testing_df = testing_df.append(pd.DataFrame(data=[[name, *evaluate(y_test, predictions)]], columns=['Model', 'accuracy', 'precision', 'recall', 'f1', 'balanced_accuracy', 'roc_auc']))
        
    return testing_df

In [None]:
test_diffrent_algorythms(models, X_train, y_train, X_test, y_test)

In [None]:
test_diffrent_algorythms(models, X_train_only_financials, y_train_only_financials, X_test_only_financials, y_test_only_financials)

In [None]:
test_diffrent_algorythms(models, X_train_without_news, y_train_without_news, X_test_without_news, y_test_without_news)

In [None]:
test_diffrent_algorythms(models, X_train_without_MDandA, y_train_without_MDandA, X_test_without_MDandA, y_test_without_MDandA)

In [None]:
import matplotlib.pyplot as plt

all_features = test_diffrent_algorythms(models, X_train, y_train, X_test, y_test)['accuracy']
only_financials = test_diffrent_algorythms(models, X_train_only_financials, y_train_only_financials, X_test_only_financials, y_test_only_financials)['accuracy']
with_MDandA = test_diffrent_algorythms(models, X_train_without_news, y_train_without_news, X_test_without_news, y_test_without_news)['accuracy']
with_news = test_diffrent_algorythms(models, X_train_without_MDandA, y_train_without_MDandA, X_test_without_MDandA, y_test_without_MDandA)['accuracy']

In [None]:
x = [
    'LR',
    'Lasso',
    'Ridge',
    'SVM',
    'RF',
    'KNN',
    'DT',
    'AdaBoost',
    'CatBoost',
    'XGB',
    'LightGBM'
]

fig, ax = plt.subplots(figsize=(18,6))

ax.plot(x, all_features, label='All feaures', marker ='.', markersize=10)
ax.plot(x, only_financials, label='Only financial indicators', marker ='.', markersize=10)
ax.plot(x, with_MDandA, label='With MD&A polariry', marker ='.', markersize=10)
ax.plot(x, with_news, label='With news polarity', marker ='.', markersize=10)
leg = ax.legend()
plt.savefig('filename.png', format='png', transparent=True)
plt.show()

In [None]:
class GridSearch(object):
    
    def __init__(self,X_train,y_train,model,hyperparameters):
        
        self.X_train = X_train
        self.y_train = y_train
        self.model = model
        self.hyperparameters = hyperparameters
        
    def GridSearch(self):
        # Create randomized search 3-fold cross validation
        cv = 3
        clf = GridSearchCV(self.model,
                                 self.hyperparameters,
                                 cv=cv,
                                 verbose=0,
                                 n_jobs=-1,
                                 )
        # Fit randomized search
        best_model = clf.fit(self.X_train, self.y_train)
        message = (best_model.best_score_, best_model.best_params_)
        print("Best: %f using %s" % (message))

        return best_model,best_model.best_params_
    
    def BestModelPredict(self,X_test):
        
        best_model,_ = self.GridSearch()
        pred = best_model.predict(X_test)
        return pred

In [None]:
params = {'depth':[6,7,8,9,10],
          'iterations':[100,250,500,100]
         }

In [None]:
cb = CatBoostClassifier()
cb_GridSearch = GridSearch(X_train,y_train,cb,params)

In [None]:
prediction_cb = cb_GridSearch.BestModelPredict(X_test)

In [None]:
print('Test set evaluation:\n')
print_evaluate(y_test, prediction_cb)
print('--------------------------------\n')

In [None]:
confusion_matrix = metrics.confusion_matrix(prediction_cb, y_test)
sns.heatmap(confusion_matrix,
            annot=True,
            fmt='g')
plt.ylabel('Prediction',fontsize=13)
plt.xlabel('Actual',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()

In [None]:
sns.set(font_scale=1.5)
sns.set_color_codes("muted")
plt.figure(figsize=(10, 8))
fpr, tpr, _ = metrics.roc_curve(y_test, prediction_cb)
lw = 2
plt.plot(fpr, tpr, lw=lw, label='ROC curve')
plt.plot([0, 1], [0, 1])
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.show()

In [None]:
cb = CatBoostClassifier(depth=8, iterations=500)
cb.fit(X_train,y_train)

In [None]:
feature_scores = pd.Series(cb.feature_importances_, index=dataset.drop('FINANCIAL_DISTRESS',axis=1).columns).sort_values(ascending=False)
feature_scores

In [None]:
f, ax = plt.subplots(figsize=(30, 24))
ax = sns.barplot(x=feature_scores, y=feature_scores.index)
ax.set_title("Visualize feature scores of the features")
ax.set_yticklabels(feature_scores.index)
ax.set_xlabel("Feature importance score")
ax.set_ylabel("Features")
plt.show()