In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, LinearRegression, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RANSACRegressor, TheilSenRegressor

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF12', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17', 'OF18', 'OF19', 'OF20',
    'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF29', 'OF30', 'OF31', 'OF32', 'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2',
    'F3_1', 'F3_2', 'F3_3', 'F3_4', 'F3_5', 'F3_6', 'F3_7', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16',
    'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36',
    'F37', 'F38', 'F39', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56',
    'F57', 'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F66', 'F67', 'F68', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'PC', 'FC', 'WRI', 'SVT', 'VCHWC',
    'HWCC', 'MC', 'PP', 'ST', 'SWP', 'DP', 'ADLM', 'ATDO', 'AOD'
]

results_columns = ['WS']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(), RandomForestRegressor(), AdaBoostRegressor(), 
    KNeighborsRegressor(), MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000), 
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(), LinearRegression(), RANSACRegressor(), 
    TheilSenRegressor()
]
# Create dictionaries to store feature importances for all models
all_feature_importances_avg = {col: [] for col in X.columns}
points_per_feature = {col: 0 for col in X.columns}
top_bottom_features_per_model = {type(model).__name__: {'Top 3': [], 'Bottom 3': []} for model in models}

# Iterate over models
for model in models:
    print(model)
    # Train the model
    model.fit(X_train, y_train)

    # Perform permutation feature importance analysis
    perm_importance = permutation_importance(model, X_test, y_test, n_repeats=30, random_state=42)

    # Get feature importances
    feature_importances = perm_importance.importances_mean

    # Get indices of features sorted by importance
    most_important_indices = feature_importances.argsort()[-3:][::-1]
    least_important_indices = feature_importances.argsort()[:3]

    
    important_indices = feature_importances.argsort()[:]

    # Calculate points for each feature
    for i in range(len(important_indices)):
        points_per_feature[X.columns[important_indices[i]]] += i



    # Store top 3 and bottom 3 important features
    for idx in most_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Top 3'].append((X.columns[idx], feature_importances[idx]))
    for idx in least_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Bottom 3'].append((X.columns[idx], feature_importances[idx]))

    # Append the feature importances to the dictionary
    for idx, col in enumerate(X.columns):
        all_feature_importances_avg[col].append(feature_importances[idx])

# Calculate average feature importance across all models
for col, importances in all_feature_importances_avg.items():
    if len(importances) > 0:
        all_feature_importances_avg[col] = np.mean(importances)

# Get indices of features sorted by average importance
sorted_indices_avg = np.argsort(list(all_feature_importances_avg.values()))[::-1]

# Print the top 3 and bottom 3 important features for each model
for model, features in top_bottom_features_per_model.items():
    print(f"\nModel: {model}")
    print("Top 3 Important Features:")
    for feature, importance in features['Top 3']:
        print(f"'{feature}': {importance}")
    print("Bottom 3 Important Features:")
    for feature, importance in features['Bottom 3']:
        print(f"'{feature}': {importance}")

# Print the average importance of features
print("\nAverage Importance of Features:")
for idx in sorted_indices_avg:
    print(f"Feature '{list(all_feature_importances_avg.keys())[idx]}': {list(all_feature_importances_avg.values())[idx]}")

# Calculate points per feature
print("\nPoints per Feature:")
sorted_points = sorted(points_per_feature.items(), key=lambda x: x[1], reverse=True)
for feature, points in sorted_points:
    print(f"Feature '{feature}': {points} points")


Ridge()
DecisionTreeRegressor()

Model: Ridge
Top 3 Important Features:
'PP': 5.301731060035581
'AOD': 5.264529534467056
'F34': 2.7345460955828003
'F11': 2.6286312816353075
'OF30': 2.4843239119352045
'F22': 2.1971634456052014
'OF9': 2.0512011421124194
'WRI': 1.6574263679690526
'F31': 1.2229642674889267
'OF6': 1.121907526336645
'OF3': 1.0038072349631724
'ADLM': 0.8910138665833529
'OF14': 0.8900342658967929
'OF2': 0.8382964062621189
'F7': 0.7797023803746167
'F9': 0.7740581417580263
'F57': 0.7553158562737584
'F2': 0.746780429431116
'OF22': 0.6596992105028221
'OF15': 0.6553883248253138
'S1': 0.6217427729420403
'OF11': 0.5591259026969359
'F66': 0.5420485449222876
'F23': 0.3950665886835092
'F24': 0.377096203747351
'OF28': 0.3711377341774096
'OF10': 0.3433169727908231
'F33': 0.33172772661556943
'OF27': 0.3055563188758394
'F4': 0.2574013670132596
'F56': 0.20139584451718898
'F28': 0.19391229506528967
'DP': 0.17704661269547917
'S6': 0.15603740160716673
'F50': 0.13033051878569496
'S5': 0.09363146

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, LinearRegression, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RANSACRegressor, TheilSenRegressor

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF12', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17', 'OF18', 'OF19', 'OF20',
    'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF29', 'OF30', 'OF31', 'OF32', 'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2',
    'F3_1', 'F3_2', 'F3_3', 'F3_4', 'F3_5', 'F3_6', 'F3_7', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16',
    'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36',
    'F37', 'F38', 'F39', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56',
    'F57', 'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F66', 'F67', 'F68', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'PC', 'FC', 'WRI', 'SVT', 'VCHWC',
    'HWCC', 'MC', 'PP', 'ST', 'SWP', 'DP', 'ADLM', 'ATDO', 'AOD'
]

results_columns = ['SR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(), RandomForestRegressor(), AdaBoostRegressor(), 
    KNeighborsRegressor(), MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000), 
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(), LinearRegression(), RANSACRegressor(), 
    TheilSenRegressor()
]

# Create dictionaries to store feature importances for all models
all_feature_importances_avg = {col: [] for col in X.columns}
points_per_feature = {col: 0 for col in X.columns}
top_bottom_features_per_model = {type(model).__name__: {'Top 3': [], 'Bottom 3': []} for model in models}

# Iterate over models
for model in models:
    print(model)
    # Train the model
    model.fit(X_train, y_train)

    # Perform permutation feature importance analysis
    perm_importance = permutation_importance(model, X_test, y_test, n_repeats=30, random_state=42)

    # Get feature importances
    feature_importances = perm_importance.importances_mean

    # Get indices of features sorted by importance
    most_important_indices = feature_importances.argsort()[-3:][::-1]
    least_important_indices = feature_importances.argsort()[:3]

    
    important_indices = feature_importances.argsort()[:]

    # Calculate points for each feature
    for i in range(len(important_indices)):
        points_per_feature[X.columns[important_indices[i]]] += i



    # Store top 3 and bottom 3 important features
    for idx in most_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Top 3'].append((X.columns[idx], feature_importances[idx]))
    for idx in least_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Bottom 3'].append((X.columns[idx], feature_importances[idx]))

    # Append the feature importances to the dictionary
    for idx, col in enumerate(X.columns):
        all_feature_importances_avg[col].append(feature_importances[idx])

# Calculate average feature importance across all models
for col, importances in all_feature_importances_avg.items():
    if len(importances) > 0:
        all_feature_importances_avg[col] = np.mean(importances)

# Get indices of features sorted by average importance
sorted_indices_avg = np.argsort(list(all_feature_importances_avg.values()))[::-1]

# Print the top 3 and bottom 3 important features for each model
for model, features in top_bottom_features_per_model.items():
    print(f"\nModel: {model}")
    print("Top 3 Important Features:")
    for feature, importance in features['Top 3']:
        print(f"'{feature}': {importance}")
    print("Bottom 3 Important Features:")
    for feature, importance in features['Bottom 3']:
        print(f"'{feature}': {importance}")

# Print the average importance of features
print("\nAverage Importance of Features:")
for idx in sorted_indices_avg:
    print(f"Feature '{list(all_feature_importances_avg.keys())[idx]}': {list(all_feature_importances_avg.values())[idx]}")

# Calculate points per feature
print("\nPoints per Feature:")
sorted_points = sorted(points_per_feature.items(), key=lambda x: x[1], reverse=True)
for feature, points in sorted_points:
    print(f"Feature '{feature}': {points} points")


Ridge()
DecisionTreeRegressor()
GradientBoostingRegressor()
RandomForestRegressor()
AdaBoostRegressor()
KNeighborsRegressor()
MLPRegressor()




ElasticNet()
SGDRegressor()
SVR(cache_size=1000)
BayesianRidge(max_iter=1000)
KernelRidge()
LinearRegression()
RANSACRegressor()
TheilSenRegressor()

Model: Ridge
Top 3 Important Features:
'OF18': 31.84865521419477
'F11': 9.180577106099038
'F45': 6.141058180179804
Bottom 3 Important Features:
'PP': -6.756279287742533
'OF30': -5.8593884664504765
'F43': -4.7113567493026345

Model: DecisionTreeRegressor
Top 3 Important Features:
'F43': 2.3084431148393496
'F24': 0.6558192378707779
'F28': 0.09272662218040062
Bottom 3 Important Features:
'OF15': -0.005400547706589224
'F9': -0.00492300276798399
'OF25': -0.004138283063172817

Model: GradientBoostingRegressor
Top 3 Important Features:
'F43': 1.8006873837415314
'F24': 0.5232289150533193
'F28': 0.0679870208198227
Bottom 3 Important Features:
'F3_5': -0.00967332892400603
'FC': -0.008647359748617927
'MC': -0.0019331773645229866

Model: RandomForestRegressor
Top 3 Important Features:
'F43': 2.1550153663681346
'F24': 0.4717745571283654
'OF22': 0.0722

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, LinearRegression, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RANSACRegressor, TheilSenRegressor

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF12', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17', 'OF18', 'OF19', 'OF20',
    'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF29', 'OF30', 'OF31', 'OF32', 'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2',
    'F3_1', 'F3_2', 'F3_3', 'F3_4', 'F3_5', 'F3_6', 'F3_7', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16',
    'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36',
    'F37', 'F38', 'F39', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56',
    'F57', 'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F66', 'F67', 'F68', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'PC', 'FC', 'WRI', 'SVT', 'VCHWC',
    'HWCC', 'MC', 'PP', 'ST', 'SWP', 'DP', 'ADLM', 'ATDO', 'AOD'
]

results_columns = ['NR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(), RandomForestRegressor(), AdaBoostRegressor(), 
    KNeighborsRegressor(), MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000), 
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(), LinearRegression(), RANSACRegressor(), 
    TheilSenRegressor()
]

# Create dictionaries to store feature importances for all models
all_feature_importances_avg = {col: [] for col in X.columns}
points_per_feature = {col: 0 for col in X.columns}
top_bottom_features_per_model = {type(model).__name__: {'Top 3': [], 'Bottom 3': []} for model in models}

# Iterate over models
for model in models:
    print(model)
    # Train the model
    model.fit(X_train, y_train)

    # Perform permutation feature importance analysis
    perm_importance = permutation_importance(model, X_test, y_test, n_repeats=30, random_state=42)

    # Get feature importances
    feature_importances = perm_importance.importances_mean

    # Get indices of features sorted by importance
    most_important_indices = feature_importances.argsort()[-3:][::-1]
    least_important_indices = feature_importances.argsort()[:3]

    
    important_indices = feature_importances.argsort()[:]

    # Calculate points for each feature
    for i in range(len(important_indices)):
        points_per_feature[X.columns[important_indices[i]]] += i



    # Store top 3 and bottom 3 important features
    for idx in most_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Top 3'].append((X.columns[idx], feature_importances[idx]))
    for idx in least_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Bottom 3'].append((X.columns[idx], feature_importances[idx]))

    # Append the feature importances to the dictionary
    for idx, col in enumerate(X.columns):
        all_feature_importances_avg[col].append(feature_importances[idx])

# Calculate average feature importance across all models
for col, importances in all_feature_importances_avg.items():
    if len(importances) > 0:
        all_feature_importances_avg[col] = np.mean(importances)

# Get indices of features sorted by average importance
sorted_indices_avg = np.argsort(list(all_feature_importances_avg.values()))[::-1]

# Print the top 3 and bottom 3 important features for each model
for model, features in top_bottom_features_per_model.items():
    print(f"\nModel: {model}")
    print("Top 3 Important Features:")
    for feature, importance in features['Top 3']:
        print(f"'{feature}': {importance}")
    print("Bottom 3 Important Features:")
    for feature, importance in features['Bottom 3']:
        print(f"'{feature}': {importance}")

# Print the average importance of features
print("\nAverage Importance of Features:")
for idx in sorted_indices_avg:
    print(f"Feature '{list(all_feature_importances_avg.keys())[idx]}': {list(all_feature_importances_avg.values())[idx]}")

# Calculate points per feature
print("\nPoints per Feature:")
sorted_points = sorted(points_per_feature.items(), key=lambda x: x[1], reverse=True)
for feature, points in sorted_points:
    print(f"Feature '{feature}': {points} points")





Model: Ridge
Top 3 Important Features:
'OF18': 27.632925149809928
'F11': 12.08908898362015
'SVT': 3.6835378235824616
Bottom 3 Important Features:
'DP': -5.885905067752659
'F43': -5.752132243205673
'F12': -3.32027912576065

Model: DecisionTreeRegressor
Top 3 Important Features:
'F43': 2.384832173050569
'F24': 0.08308886005933136
'AOD': 0.029329532842199118
Bottom 3 Important Features:
'F15': -0.0005478347367338913
'ADLM': -0.0005212393377743485
'OF3': -0.0004885318809984625

Model: GradientBoostingRegressor
Top 3 Important Features:
'F43': 2.1710266041393433
'F24': 0.030879712500210956
'OF16': 0.006748229222537857
Bottom 3 Important Features:
'ATDO': -0.0027549857909618504
'ADLM': -0.0023035023021446923
'F46': -0.002076294492993684

Model: RandomForestRegressor
Top 3 Important Features:
'F43': 2.241576605640768
'F24': 0.0367944351697723
'OF16': 0.005999797022119195
Bottom 3 Important Features:
'ATDO': -0.0027427148235897388
'ADLM': -0.001015341086715201
'F57': -0.0008350554025266262

M

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, LinearRegression, BayesianRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RANSACRegressor, TheilSenRegressor

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF12', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17', 'OF18', 'OF19', 'OF20',
    'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF29', 'OF30', 'OF31', 'OF32', 'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2',
    'F3_1', 'F3_2', 'F3_3', 'F3_4', 'F3_5', 'F3_6', 'F3_7', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16',
    'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36',
    'F37', 'F38', 'F39', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56',
    'F57', 'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F66', 'F67', 'F68', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'PC', 'FC', 'WRI', 'SVT', 'VCHWC',
    'HWCC', 'MC', 'PP', 'ST', 'SWP', 'DP', 'ADLM', 'ATDO', 'AOD'
]

results_columns = ['PR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(), RandomForestRegressor(), AdaBoostRegressor(), 
    KNeighborsRegressor(), MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000), 
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(), LinearRegression(), RANSACRegressor(), 
    TheilSenRegressor()
]

# Create dictionaries to store feature importances for all models
all_feature_importances_avg = {col: [] for col in X.columns}
points_per_feature = {col: 0 for col in X.columns}
top_bottom_features_per_model = {type(model).__name__: {'Top 3': [], 'Bottom 3': []} for model in models}

# Iterate over models
for model in models:
    print(model)
    # Train the model
    model.fit(X_train, y_train)

    # Perform permutation feature importance analysis
    perm_importance = permutation_importance(model, X_test, y_test, n_repeats=30, random_state=42)

    # Get feature importances
    feature_importances = perm_importance.importances_mean

    # Get indices of features sorted by importance
    most_important_indices = feature_importances.argsort()[-3:][::-1]
    least_important_indices = feature_importances.argsort()[:3]

    
    important_indices = feature_importances.argsort()[:]

    # Calculate points for each feature
    for i in range(len(important_indices)):
        points_per_feature[X.columns[important_indices[i]]] += i



    # Store top 3 and bottom 3 important features
    for idx in most_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Top 3'].append((X.columns[idx], feature_importances[idx]))
    for idx in least_important_indices:
        top_bottom_features_per_model[type(model).__name__]['Bottom 3'].append((X.columns[idx], feature_importances[idx]))

    # Append the feature importances to the dictionary
    for idx, col in enumerate(X.columns):
        all_feature_importances_avg[col].append(feature_importances[idx])

# Calculate average feature importance across all models
for col, importances in all_feature_importances_avg.items():
    if len(importances) > 0:
        all_feature_importances_avg[col] = np.mean(importances)

# Get indices of features sorted by average importance
sorted_indices_avg = np.argsort(list(all_feature_importances_avg.values()))[::-1]

# Print the top 3 and bottom 3 important features for each model
for model, features in top_bottom_features_per_model.items():
    print(f"\nModel: {model}")
    print("Top 3 Important Features:")
    for feature, importance in features['Top 3']:
        print(f"'{feature}': {importance}")
    print("Bottom 3 Important Features:")
    for feature, importance in features['Bottom 3']:
        print(f"'{feature}': {importance}")

# Print the average importance of features
print("\nAverage Importance of Features:")
for idx in sorted_indices_avg:
    print(f"Feature '{list(all_feature_importances_avg.keys())[idx]}': {list(all_feature_importances_avg.values())[idx]}")

# Calculate points per feature
print("\nPoints per Feature:")
sorted_points = sorted(points_per_feature.items(), key=lambda x: x[1], reverse=True)
for feature, points in sorted_points:
    print(f"Feature '{feature}': {points} points")


Ridge()
DecisionTreeRegressor()
GradientBoostingRegressor()
RandomForestRegressor()
AdaBoostRegressor()
KNeighborsRegressor()
MLPRegressor()
ElasticNet()
SGDRegressor()
SVR(cache_size=1000)
BayesianRidge(max_iter=1000)
KernelRidge()
LinearRegression()
RANSACRegressor()
TheilSenRegressor()

Model: Ridge
Top 3 Important Features:
'OF18': 27.03901650712978
'F11': 8.331747548555692
'F45': 4.681793858316238
Bottom 3 Important Features:
'F43': -4.6620416989712945
'AOD': -4.38183030716788
'PP': -3.7665872360119463

Model: DecisionTreeRegressor
Top 3 Important Features:
'F43': 1.4616872951012045
'F28': 0.04358311245038163
'F63': 0.023079567518824954
Bottom 3 Important Features:
'OF11': -0.006905981533117556
'F58': -0.006615457490355295
'OF27': -0.0041424249592182825

Model: GradientBoostingRegressor
Top 3 Important Features:
'F43': 1.4277683410815531
'F63': 0.04367749960892036
'AOD': 0.012049977252824218
Bottom 3 Important Features:
'F35': -0.0013743264370359887
'F3_4': -0.0012164365159749503
