In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

In [2]:
def load_data(filename, index_col=None):
    data = pd.read_csv(filename, na_values='NULL', decimal=",")
    if index_col:
        data.set_index(index_col, inplace=True)
    return data

In [3]:
def calculate_and_plot_importance(model, X, y, method, plot_filename):
    model.fit(X, y)
    importance = model.feature_importances_ if method == "RandomForest" else model.coef_[0]
        
    sorted_idx = np.argsort(importance)[::-1]
    sorted_importance = [(X.columns[index], importance[index]) for index in sorted_idx]
    
    print(sorted_importance)
    
    df_importance = pd.DataFrame(sorted_importance, columns=['Features', 'Importance'])
    df_max_importance = df_importance.nlargest(30, 'Importance')
    df_max_importance.plot(kind='bar', x='Features', y='Importance')
    plt.tight_layout()
    plt.savefig(plot_filename, facecolor='w')
    plt.show()

In [None]:
input_filename = 'path_to_input_file.csv'
index_col = 'Pat'  # Column to set as index

# Load data
data = load_data(input_filename, index_col)

X = data.drop('target_column_name', axis=1)
y = data['target_column_name']

# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=0)
calculate_and_plot_importance(rf_model, X, y, "RandomForest", 'path_to_save_random_forest_plot.png')

# Logistic Regression
lr_model = LogisticRegression()
calculate_and_plot_importance(lr_model, X, y, "LogisticRegression", 'path_to_save_logistic_regression_plot.png')